def test_component(): from scipy import stats x = load_sparse_sample() c1,cs1 = bct.get_components(x) assert np.max(c1) == 19 assert np.max(cs1) == 72 try: import networkx c2,cs2 = bct.get_components(x, no_depend=True) assert np.max(c2) == 19 assert np.max(cs2) == 72 except ImportError: pass assert bct.number_of_components(x) == 19
def test_component(): from scipy import stats x = load_sparse_sample() c1, cs1 = bct.get_components(x) assert np.max(c1) == 19 assert np.max(cs1) == 72 try: import networkx c2, cs2 = bct.get_components(x, no_depend=True) assert np.max(c2) == 19 assert np.max(cs2) == 72 except ImportError: pass assert bct.number_of_components(x) == 19
def test_component(): from scipy import stats x = load_sparse_sample() c1, cs1 = bct.get_components(x) print(np.max(c1), 19) assert np.max(c1) == 19 print(np.max(cs1), 72) assert np.max(cs1) == 72
def getBiggestComponent(pairwise_measure_matrix): """ Get the biggest component of the weighted adjacency matrix. Arguments: pairwise_measure_matrix, numpy.ndarray Returns: biggest_comp, the biggest component of the weighted adjacency matrix keep_indices, the indices of the nodes in the biggest component comp_assign, integers indicating to which component each node was designated comp_size, the size of each component Note: Requires bct, the python version of the brain connectivity toolbox https://github.com/aestrivex/bctpy """ adjacency_matrix = (pairwise_measure_matrix > 0).astype(int) comp_assign, comp_size = bct.get_components(adjacency_matrix) keep_indices = np.nonzero(comp_assign == comp_size.argmax() + 1)[0] biggest_comp = pairwise_measure_matrix[keep_indices][:, keep_indices] np.fill_diagonal(biggest_comp, 0) return biggest_comp, keep_indices, comp_assign, comp_size
def nbs_bct(x,y,thresh,k=1000,tail='both',paired=False,verbose=False): ''' Performs the NBS for populations X and Y for a t-statistic threshold of alpha. Parameters ---------- x : NxNxP np.ndarray matrix representing the first population with P subjects. must be symmetric. y : NxNxQ np.ndarray matrix representing the second population with Q subjects. Q need not equal P. must be symmetric. thresh : float minimum t-value used as threshold k : int number of permutations used to estimate the empirical null distribution tail : {'left', 'right', 'both'} enables specification of particular alternative hypothesis 'left' : mean population of X < mean population of Y 'right' : mean population of Y < mean population of X 'both' : means are unequal (default) paired : bool use paired sample t-test instead of population t-test. requires both subject populations to have equal N. default value = False verbose : bool print some extra information each iteration. defaults value = False Returns ------- pval : Cx1 np.ndarray A vector of corrected p-values for each component of the networks identified. If at least one p-value is less than alpha, the omnibus null hypothesis can be rejected at alpha significance. The null hypothesis is that the value of the connectivity from each edge has equal mean across the two populations. adj : IxIxC np.ndarray an adjacency matrix identifying the edges comprising each component. edges are assigned indexed values. null : Kx1 np.ndarray A vector of K sampled from the null distribution of maximal component size. Notes ----- ALGORITHM DESCRIPTION The NBS is a nonparametric statistical test used to isolate the components of an N x N undirected connectivity matrix that differ significantly between two distinct populations. Each element of the connectivity matrix stores a connectivity value and each member of the two populations possesses a distinct connectivity matrix. A component of a connectivity matrix is defined as a set of interconnected edges. The NBS is essentially a procedure to control the family-wise error rate, in the weak sense, when the null hypothesis is tested independently at each of the N(N-1)/2 edges comprising the undirected connectivity matrix. The NBS can provide greater statistical power than conventional procedures for controlling the family-wise error rate, such as the false discovery rate, if the set of edges at which the null hypothesis is rejected constitues a large component or components. The NBS comprises fours steps: 1. Perform a two-sample T-test at each edge indepedently to test the hypothesis that the value of connectivity between the two populations come from distributions with equal means. 2. Threshold the T-statistic available at each edge to form a set of suprathreshold edges. 3. Identify any components in the adjacency matrix defined by the set of suprathreshold edges. These are referred to as observed components. Compute the size of each observed component identified; that is, the number of edges it comprises. 4. Repeat K times steps 1-3, each time randomly permuting members of the two populations and storing the size of the largest component identified for each permuation. This yields an empirical estimate of the null distribution of maximal component size. A corrected p-value for each observed component is then calculated using this null distribution. [1] Zalesky A, Fornito A, Bullmore ET (2010) Network-based statistic: Identifying differences in brain networks. NeuroImage. 10.1016/j.neuroimage.2010.06.041 ''' def ttest2_stat_only(x,y,tail): t=np.mean(x)-np.mean(y) n1,n2=len(x),len(y) s=np.sqrt(((n1-1)*np.var(x,ddof=1)+(n2-1)*np.var(y,ddof=1))/(n1+n2-2)) denom=s*np.sqrt(1/n1+1/n2) if denom==0: return 0 if tail=='both': return np.abs(t/denom) if tail=='left': return -t/denom else: return t/denom def ttest_paired_stat_only(A,B,tail): n = len(A-B) df = n-1 sample_ss = np.sum((A-B)**2) - np.sum(A-B)**2/n unbiased_std = np.sqrt(sample_ss/(n-1)) z = np.mean(A-B) / unbiased_std t = z * np.sqrt(n) if tail=='both': return np.abs(t) if tail=='left': return -t else: return t if tail not in ('both','left','right'): raise BCTParamError('Tail must be both, left, right') ix,jx,nx=x.shape iy,jy,ny=y.shape if not ix==jx==iy==jy: raise BCTParamError('Population matrices are of inconsistent size') else: n=ix if paired and nx!=ny: raise BCTParamError('Population matrices must be an equal size') #only consider upper triangular edges ixes=np.where(np.triu(np.ones((n,n)),1)) #number of edges m=np.size(ixes,axis=1) #vectorize connectivity matrices for speed xmat,ymat=np.zeros((m,nx)),np.zeros((m,ny)) for i in xrange(nx): xmat[:,i]=x[:,:,i][ixes].squeeze() for i in xrange(ny): ymat[:,i]=y[:,:,i][ixes].squeeze() del x,y #perform t-test at each edge t_stat=np.zeros((m,)) for i in xrange(m): if paired: t_stat[i]=ttest_paired_stat_only(xmat[i,:],ymat[i,:],tail) else: t_stat[i]=ttest2_stat_only(xmat[i,:],ymat[i,:],tail) #threshold ind_t,=np.where(t_stat>thresh) if len(ind_t) == 0: raise BCTParamError("Unsuitable threshold") #suprathreshold adjacency matrix adj=np.zeros((n,n)) adj[(ixes[0][ind_t],ixes[1][ind_t])]=1 #adj[ixes][ind_t]=1 adj=adj+adj.T a,sz=get_components(adj) #convert size from nodes to number of edges #only consider components comprising more than one node (e.g. a/l 1 edge) ind_sz,=np.where(sz>1) ind_sz+=1 nr_components=np.size(ind_sz) sz_links=np.zeros((nr_components,)) for i in xrange(nr_components): nodes,=np.where(ind_sz[i]==a) sz_links[i]=np.sum(adj[np.ix_(nodes,nodes)])/2 adj[np.ix_(nodes,nodes)]*=(i+2) #subtract 1 to delete any edges not comprising a component adj[np.where(adj)]-=1 if np.size(sz_links): max_sz=np.max(sz_links) else: #max_sz=0 raise BCTParamError('True matrix is degenerate') print 'max component size is %i'%max_sz #estimate empirical null distribution of maximum component size by #generating k independent permutations print 'estimating null distribution with %i permutations'%k null=np.zeros((k,)) hit=0 for u in xrange(k): #randomize if paired: indperm = np.sign(0.5 - np.random.rand(1, nx)) d=np.hstack((xmat,ymat))*np.hstack((indperm,indperm)) else: d=np.hstack((xmat,ymat))[:,np.random.permutation(nx+ny)] t_stat_perm=np.zeros((m,)) for i in xrange(m): if paired: t_stat_perm[i]=ttest_paired_stat_only(d[i,:nx],d[i,-nx:],tail) else: t_stat_perm[i]=ttest2_stat_only(d[i,:nx],d[i,-ny:],tail) ind_t,=np.where(t_stat_perm>thresh) adj_perm=np.zeros((n,n)) adj_perm[(ixes[0][ind_t],ixes[1][ind_t])]=1 adj_perm=adj_perm+adj_perm.T a,sz=get_components(adj_perm) ind_sz,=np.where(sz>1) ind_sz+=1 nr_components_perm=np.size(ind_sz) sz_links_perm=np.zeros((nr_components_perm)) for i in xrange(nr_components_perm): nodes,=np.where(ind_sz[i]==a) sz_links_perm[i]=np.sum(adj_perm[np.ix_(nodes,nodes)])/2 if np.size(sz_links_perm): null[u]=np.max(sz_links_perm) else: null[u]=0 #compare to the true dataset if null[u] >= max_sz: hit+=1 if verbose: print ('permutation %i of %i. Permutation max is %s. Observed max' ' is %s. P-val estimate is %.3f')%( u,k,null[u],max_sz,hit/(u+1)) elif (u%(k/10) == 0 or u==k-1): print 'permutation %i of %i. p-value so far is %.3f'%(u,k, hit/(u+1)) pvals=np.zeros((nr_components,)) #calculate p-vals for i in xrange(nr_components): pvals[i]=np.size(np.where(null>=sz_links[i]))/k return pvals,adj,null
def nbs_bct(x,y,thresh,k=1000,tail='both'): ''' PVAL = NBS(X,Y,THRESH) Performs the NBS for populations X and Y for a t-statistic threshold of alpha. inputs: x,y, matrices representing the two populations being compared. x and y are of size NxNxP, where N is the number of nodes in the network and P is the number of subjects within the population. P need not be equal for both X and Y. X[i,j,k] stores the connectivity value corresponding to the edge between i and j for the kth member of the population. x and y must be symmetric. thresh, the minimum t-value used as threshold k, the number of permutations to be generated to estimate the empirical null distribution (default 1000) tail, enables specification of the type of alternative hypothesis to test. 'left': mean of population X < mean of population Y 'right': mean of population Y < mean of population X 'both': means are unequal (default) outputs: pval, a vector of corrected p-values for each component of the network that is identified. If at least one p-value is less than alpha, then the omnibus null hypothesis can be rejected at alpha significance. The null hypothesis is that the value of connectivity at each edge comes from distributions of equal mean between the two populations. adj, an adjacency matrix identifying the edges comprising each component. Edges are assigned indexed values. null, A vector of k samples from the null distribution of maximal component size ALGORITHM DESCRIPTION The NBS is a nonparametric statistical test used to isolate the components of an N x N undirected connectivity matrix that differ significantly between two distinct populations. Each element of the connectivity matrix stores a connectivity value and each member of the two populations possesses a distinct connectivity matrix. A component of a connectivity matrix is defined as a set of interconnected edges. The NBS is essentially a procedure to control the family-wise error rate, in the weak sense, when the null hypothesis is tested independently at each of the N(N-1)/2 edges comprising the undirected connectivity matrix. The NBS can provide greater statistical power than conventional procedures for controlling the family-wise error rate, such as the false discovery rate, if the set of edges at which the null hypothesis is rejected constitues a large component or components. The NBS comprises fours steps: 1. Perform a two-sample T-test at each edge indepedently to test the hypothesis that the value of connectivity between the two populations come from distributions with equal means. 2. Threshold the T-statistic available at each edge to form a set of suprathreshold edges. 3. Identify any components in the adjacency matrix defined by the set of suprathreshold edges. These are referred to as observed components. Compute the size of each observed component identified; that is, the number of edges it comprises. 4. Repeat K times steps 1-3, each time randomly permuting members of the two populations and storing the size of the largest component identified for each permuation. This yields an empirical estimate of the null distribution of maximal component size. A corrected p-value for each observed component is then calculated using this null distribution. [1] Zalesky A, Fornito A, Bullmore ET (2010) Network-based statistic: Identifying differences in brain networks. NeuroImage. 10.1016/j.neuroimage.2010.06.041 DEPENDENCIES Please note that nbs_bct depends on networkx ''' def ttest2_stat_only(x,y,tail): t=np.mean(x)-np.mean(y) n1,n2=len(x),len(y) s=np.sqrt(((n1-1)*np.var(x)+(n2-1)*np.var(y))/(n1+n2-2)) denom=s*np.sqrt(1/n1+1/n2) if denom==0: return 0 if tail=='both': return np.abs(t/denom) if tail=='left': return -t/denom else: return t/denom if tail not in ('both','left','right'): raise BCTParamError('Tail must be both, left, right') ix,jx,nx=x.shape iy,jy,ny=y.shape if not ix==jx==iy==jy: raise BCTParamError('Population matrices are of inconsistent size') else: n=ix #only consider upper triangular edges ixes=np.where(np.triu(np.ones((n,n)),1)) #number of edges m=np.size(ixes,axis=1) #vectorize connectivity matrices for speed xmat,ymat=np.zeros((m,nx)),np.zeros((m,ny)) for i in xrange(nx): xmat[:,i]=x[:,:,i][ixes].squeeze() for i in xrange(ny): ymat[:,i]=y[:,:,i][ixes].squeeze() del x,y #perform t-test at each edge t_stat=np.zeros((m,)) for i in xrange(m): t_stat[i]=ttest2_stat_only(xmat[i,:],ymat[i,:],tail) #threshold ind_t,=np.where(t_stat>thresh) #suprathreshold adjacency matrix adj=np.zeros((n,n)) adj[np.ix_(ixes[0][ind_t],ixes[1][ind_t])]=1 #adj[ixes][ind_t]=1 adj=adj+adj.T a,sz=get_components(adj) #convert size from nodes to number of edges #only consider components comprising more than one node (e.g. a/l 1 edge) ind_sz,=np.where(sz>1) ind_sz+=1 nr_components=np.size(ind_sz) sz_links=np.zeros((nr_components,)) for i in xrange(nr_components): nodes,=np.where(ind_sz[i]==a) sz_links[i]=np.sum(adj[np.ix_(nodes,nodes)])/2 for i in xrange(nr_components): adj[np.ix_(nodes,nodes)]*=(i+2) #subtract 1 to delete any edges not comprising a component adj[np.where(adj)]-=1 if np.size(sz_links): max_sz=np.max(sz_links) else: max_sz=0 print 'max component size is %i'%max_sz #estimate empirical null distribution of maximum component size by #generating k independent permutations print 'estimating null distribution with %i permutations'%k null=np.zeros((k,)) hit=0 for u in xrange(k): #randomize d=np.hstack((xmat,ymat))[:,np.random.permutation(nx+ny)] t_stat_perm=np.zeros((m,)) for i in xrange(m): t_stat_perm[i]=ttest2_stat_only(d[i,:nx],d[i,-ny:],tail) ind_t,=np.where(t_stat_perm>thresh) adj_perm=np.zeros((n,n)) adj_perm[np.ix_(ixes[0][ind_t],ixes[1][ind_t])]=1 adj_perm=adj_perm+adj_perm.T a,sz=get_components(adj_perm) ind_sz,=np.where(sz>1) ind_sz+=1 nr_components_perm=np.size(ind_sz) sz_links_perm=np.zeros((nr_components_perm)) for i in xrange(nr_components_perm): nodes,=np.where(ind_sz[i]==a) sz_links_perm[i]=np.sum(adj_perm[np.ix_(nodes,nodes)])/2 if np.size(sz_links_perm): null[u]=np.max(sz_links_perm) else: null[u]=0 #compare to the true dataset if null[u] >= max_sz: hit+=1 # print ('permutation %i of %i. Permutation max is %s. Observed max is ' # '%s. P-val estimate is %.3f')%(u,k,null[u],max_sz,hit/(u+1)) print 'permutation %i of %i. p-value so far is %.3f'%(u,k,hit/(u+1)) pvals=np.zeros((nr_components,)) #calculate p-vals for i in xrange(nr_components): pvals[i]=np.size(np.where(null>=sz_links[i]))/k return pvals,adj,null
skewness = 1 while abs(skewness) > 0.3: w = bct.threshold_proportional(corrmat, kappa) skewness = skew(bct.degrees_und(w)) kappa += 0.01 df.at[(subject, sessions[i], task, conds[j], mask), "k_scale-free", ] = kappa # reset kappa starting point # calculate proportion of connections that need to be retained # for node connectedness kappa = 0.01 num = 2 while num > 1: w = bct.threshold_proportional(corrmat, kappa) [comp, num] = bct.get_components(w) num = np.unique(comp).shape[0] kappa += 0.01 df.at[(subject, sessions[i], task, conds[j], mask), "k_connected", ] = kappa else: pass # df.at[(subject, sessions[i], task, conds[j], mask),'k_connected'] = kappa except Exception as e: print(subject, "didn't run, because", e) df.to_csv( join( data_dir, "phys-learn-fci_kappa_{0}-{1}.csv".format( connectivity_metric, str(datetime.datetime.now())),
def create_feature_matrix(structure_matrix_file): # Feature matrix with each element containing an NxN array feature_matrix = [] # EDGE WEIGHT (Depth 0) # weighted & undirected network structural_connectivity_array = np.array( pd.DataFrame(loadmat(structure_matrix_file)['connectivity'])) feature_matrix.append(structural_connectivity_array) # DEGREE (Depth 1 & 2) # Node degree is the number of links connected to the node. deg = bct.degrees_und(structural_connectivity_array) fill_array_2D(feature_matrix, deg) # *** Conversion of connection weights to connection lengths *** connection_length_matrix = bct.weight_conversion( structural_connectivity_array, 'lengths') # print(connection_length_matrix) # SHORTEST PATH LENGTH (Depth 3 & 4) ''' The distance matrix contains lengths of shortest paths between all pairs of nodes. An entry (u,v) represents the length of shortest path from node u to node v. The average shortest path length is the characteristic path length of the network. ''' shortest_path = bct.distance_wei(connection_length_matrix) feature_matrix.append( shortest_path[0]) # distance (shortest weighted path) matrix feature_matrix.append( shortest_path[1] ) # matrix of number of edges in shortest weighted path # BETWEENNESS CENTRALITY (Depth 5 & 6) ''' Node betweenness centrality is the fraction of all shortest paths in the network that contain a given node. Nodes with high values of betweenness centrality participate in a large number of shortest paths. ''' bc = bct.betweenness_wei(connection_length_matrix) fill_array_2D(feature_matrix, bc) # CLUSTERING COEFFICIENTS (Depth 7 & 8) ''' The weighted clustering coefficient is the average "intensity" of triangles around a node. ''' cl = bct.clustering_coef_wu(connection_length_matrix) fill_array_2D(feature_matrix, cl) # Find disconnected nodes - component size set to 1 new_array = structural_connectivity_array W_bin = bct.weight_conversion(structural_connectivity_array, 'binarize') [comps, comp_sizes] = bct.get_components(W_bin) print('comp: ', comps) print('sizes: ', comp_sizes) for i in range(len(comps)): if (comps[i] != statistics.mode(comps)): new_array = np.delete(new_array, new_array[i]) return feature_matrix
def nbs_bct(x, y, thresh, k=1000, tail='both', paired=False, verbose=False): ''' PVAL = NBS(X,Y,THRESH) Performs the NBS for populations X and Y for a t-statistic threshold of alpha. inputs: x,y, matrices representing the two populations being compared. x and y are of size NxNxP, where N is the number of nodes in the network and P is the number of subjects within the population. P need not be equal for both X and Y. X[i,j,k] stores the connectivity value corresponding to the edge between i and j for the kth member of the population. x and y must be symmetric. thresh, the minimum t-value used as threshold k, the number of permutations to be generated to estimate the empirical null distribution (default 1000) tail, enables specification of the type of alternative hypothesis to test. 'left': mean of population X < mean of population Y 'right': mean of population Y < mean of population X 'both': means are unequal (default) paired, use paired sample t-test instead of population t-test. defaults to False. verbose, print extra information. default False outputs: pval, a vector of corrected p-values for each component of the network that is identified. If at least one p-value is less than alpha, then the omnibus null hypothesis can be rejected at alpha significance. The null hypothesis is that the value of connectivity at each edge comes from distributions of equal mean between the two populations. adj, an adjacency matrix identifying the edges comprising each component. Edges are assigned indexed values. null, A vector of k samples from the null distribution of maximal component size ALGORITHM DESCRIPTION The NBS is a nonparametric statistical test used to isolate the components of an N x N undirected connectivity matrix that differ significantly between two distinct populations. Each element of the connectivity matrix stores a connectivity value and each member of the two populations possesses a distinct connectivity matrix. A component of a connectivity matrix is defined as a set of interconnected edges. The NBS is essentially a procedure to control the family-wise error rate, in the weak sense, when the null hypothesis is tested independently at each of the N(N-1)/2 edges comprising the undirected connectivity matrix. The NBS can provide greater statistical power than conventional procedures for controlling the family-wise error rate, such as the false discovery rate, if the set of edges at which the null hypothesis is rejected constitues a large component or components. The NBS comprises fours steps: 1. Perform a two-sample T-test at each edge indepedently to test the hypothesis that the value of connectivity between the two populations come from distributions with equal means. 2. Threshold the T-statistic available at each edge to form a set of suprathreshold edges. 3. Identify any components in the adjacency matrix defined by the set of suprathreshold edges. These are referred to as observed components. Compute the size of each observed component identified; that is, the number of edges it comprises. 4. Repeat K times steps 1-3, each time randomly permuting members of the two populations and storing the size of the largest component identified for each permuation. This yields an empirical estimate of the null distribution of maximal component size. A corrected p-value for each observed component is then calculated using this null distribution. [1] Zalesky A, Fornito A, Bullmore ET (2010) Network-based statistic: Identifying differences in brain networks. NeuroImage. 10.1016/j.neuroimage.2010.06.041 ''' def ttest2_stat_only(x, y, tail): t = np.mean(x) - np.mean(y) n1, n2 = len(x), len(y) s = np.sqrt(((n1 - 1) * np.var(x, ddof=1) + (n2 - 1) * np.var(y, ddof=1)) / (n1 + n2 - 2)) denom = s * np.sqrt(1 / n1 + 1 / n2) if denom == 0: return 0 if tail == 'both': return np.abs(t / denom) if tail == 'left': return -t / denom else: return t / denom def ttest_paired_stat_only(A, B, tail): n = len(A - B) df = n - 1 sample_ss = np.sum((A - B)**2) - np.sum(A - B)**2 / n unbiased_std = np.sqrt(sample_ss / (n - 1)) z = np.mean(A - B) / unbiased_std t = z * np.sqrt(n) if tail == 'both': return np.abs(t) if tail == 'left': return -t else: return t if tail not in ('both', 'left', 'right'): raise BCTParamError('Tail must be both, left, right') ix, jx, nx = x.shape iy, jy, ny = y.shape if not ix == jx == iy == jy: raise BCTParamError('Population matrices are of inconsistent size') else: n = ix if paired and nx != ny: raise BCTParamError('Population matrices must be an equal size') #only consider upper triangular edges ixes = np.where(np.triu(np.ones((n, n)), 1)) #number of edges m = np.size(ixes, axis=1) #vectorize connectivity matrices for speed xmat, ymat = np.zeros((m, nx)), np.zeros((m, ny)) for i in xrange(nx): xmat[:, i] = x[:, :, i][ixes].squeeze() for i in xrange(ny): ymat[:, i] = y[:, :, i][ixes].squeeze() del x, y #perform t-test at each edge t_stat = np.zeros((m, )) for i in xrange(m): if paired: t_stat[i] = ttest_paired_stat_only(xmat[i, :], ymat[i, :], tail) else: t_stat[i] = ttest2_stat_only(xmat[i, :], ymat[i, :], tail) #threshold ind_t, = np.where(t_stat > thresh) #suprathreshold adjacency matrix adj = np.zeros((n, n)) adj[(ixes[0][ind_t], ixes[1][ind_t])] = 1 #adj[ixes][ind_t]=1 adj = adj + adj.T a, sz = get_components(adj) #convert size from nodes to number of edges #only consider components comprising more than one node (e.g. a/l 1 edge) ind_sz, = np.where(sz > 1) ind_sz += 1 nr_components = np.size(ind_sz) sz_links = np.zeros((nr_components, )) for i in xrange(nr_components): nodes, = np.where(ind_sz[i] == a) sz_links[i] = np.sum(adj[np.ix_(nodes, nodes)]) / 2 adj[np.ix_(nodes, nodes)] *= (i + 2) #subtract 1 to delete any edges not comprising a component adj[np.where(adj)] -= 1 if np.size(sz_links): max_sz = np.max(sz_links) else: #max_sz=0 raise BCTParamError('True matrix is degenerate') print 'max component size is %i' % max_sz #estimate empirical null distribution of maximum component size by #generating k independent permutations print 'estimating null distribution with %i permutations' % k null = np.zeros((k, )) hit = 0 for u in xrange(k): #randomize if paired: indperm = np.sign(0.5 - np.random.rand(1, nx)) d = np.hstack((xmat, ymat)) * np.hstack((indperm, indperm)) else: d = np.hstack((xmat, ymat))[:, np.random.permutation(nx + ny)] t_stat_perm = np.zeros((m, )) for i in xrange(m): if paired: t_stat_perm[i] = ttest_paired_stat_only( d[i, :nx], d[i, -nx:], tail) else: t_stat_perm[i] = ttest2_stat_only(d[i, :nx], d[i, -ny:], tail) ind_t, = np.where(t_stat_perm > thresh) adj_perm = np.zeros((n, n)) adj_perm[(ixes[0][ind_t], ixes[1][ind_t])] = 1 adj_perm = adj_perm + adj_perm.T a, sz = get_components(adj_perm) ind_sz, = np.where(sz > 1) ind_sz += 1 nr_components_perm = np.size(ind_sz) sz_links_perm = np.zeros((nr_components_perm)) for i in xrange(nr_components_perm): nodes, = np.where(ind_sz[i] == a) sz_links_perm[i] = np.sum(adj_perm[np.ix_(nodes, nodes)]) / 2 if np.size(sz_links_perm): null[u] = np.max(sz_links_perm) else: null[u] = 0 #compare to the true dataset if null[u] >= max_sz: hit += 1 if verbose: print( 'permutation %i of %i. Permutation max is %s. Observed max' ' is %s. P-val estimate is %.3f') % (u, k, null[u], max_sz, hit / (u + 1)) elif (u % (k / 10) == 0 or u == k - 1): print 'permutation %i of %i. p-value so far is %.3f' % (u, k, hit / (u + 1)) pvals = np.zeros((nr_components, )) #calculate p-vals for i in xrange(nr_components): pvals[i] = np.size(np.where(null >= sz_links[i])) / k return pvals, adj, null