Beispiel #1
0
def build_igraph(sub, task, membership=known_membership):
	# build the graphs for a task (one per condition, the correlation matrices should already exist
	mat_files = glob.glob('/home/despoB/arjun/{s}-{t}*'.format(s=sub,t=task))
	if not mat_files:
		print('No correlation matrices found for {s}-{t}'.format(s=sub,t=task))
		return
	else:
		for f in mat_files:
			cond_suffix = f.split('-')[-1]
			matrix = np.load(f)
			g = brain_graphs.matrix_to_igraph(matrix, cost=.1)
			partition = brain_graphs.brain_graph(VertexClustering(g, membership=membership))
			outfile = os.path.join('graphs', '{s}-{t}-{cs}'.format(s=sub, t=task, cs=cond_suffix))
			print outfile
			np.savez(outfile, graph=g, partition=partition)
def analyze_results(q_ratio=.75):
	real_degree,real_pc = get_real_degree(0.05)
	real_degree = np.array(real_degree)
	real_pc = np.array(real_pc)
	real_degree = real_degree[real_degree>0]
	real_pc = real_pc[real_pc>=0]
	n_nodes = 100
	iters = 1000
	all_shortest = 'all'
	percent = .95
	deg_both = np.load('/home/despoB/mb3152/dynamic_mod/results/new_gen_results/rich_club_gen_deg_both_%s_%s_%s_%s.npy'%(iters,n_nodes,all_shortest,q_ratio))
	pc_both = np.load('/home/despoB/mb3152/dynamic_mod/results/new_gen_results/rich_club_gen_pc_both_%s_%s_%s_%s.npy'%(iters,n_nodes,all_shortest,q_ratio))
	none_pc = np.load('/home/despoB/mb3152/dynamic_mod/results/new_gen_results/rich_club_gen_pc_none_%s_%s_%s_%s.npy'%(iters,n_nodes,all_shortest,q_ratio))
	none_deg = np.load('/home/despoB/mb3152/dynamic_mod/results/new_gen_results/rich_club_gen_deg_none_%s_%s_%s_%s.npy'%(iters,n_nodes,all_shortest,q_ratio))
	pc_graphs = np.load('/home/despoB/mb3152/dynamic_mod/results/new_gen_results/rich_club_gen_graphs_both_%s_%s_%s_%s'%(iters,n_nodes,all_shortest,q_ratio))
	idx = idx = np.zeros(shape=len(pc_graphs)).astype(bool)
	pcs = []
	for i in range(len(pc_graphs)):
		g = pc_graphs[i]
		vc = g.community_fastgreedy().as_clustering()
		v = brain_graphs.brain_graph(vc)
		p = np.array(v.pc)
		if len(p[p>0]) >= 10:
			idx[i] = True
		pcs.append(p)
		# print scipy.stats.entropy(np.histogram(p,10)[0],np.histogram(np.array(real_pc),10)[0])
	# print scipy.stats.ttest_ind(pc_both[idx,(int(n_nodes*percent))],none_pc[:,(int(n_nodes*percent))])
	# print np.nanmean(pc_both[idx,int(n_nodes*percent)])
	sns.set_style("white")
	sns.set_style("ticks")
	ax1 = sns.tsplot(pc_both[idx,:int(n_nodes*percent)],color='black',condition='PC_Q',ci=95)
	ax2 = sns.tsplot(deg_both[:,:int(n_nodes*percent)],color='yellow',condition='Deg_Q',ci=95)
	ax3 = sns.tsplot(none_pc[:,:int(n_nodes*percent)],color='red',condition='PC_None',ci=95)
	ax4 = sns.tsplot(none_deg[:,:int(n_nodes*percent)],color='blue',condition='Deg_None',ci=95)
	sns.plt.legend(loc='upper left')
	sns.plt.ylabel('Normalized Rich Club Coefficeint')
	sns.plt.xlabel('Rank')
	otherax = ax1.twinx()
	# otherax.plot(scipy.stats.ttest_ind(pc_both,none_pc)[0],color='green',label='T Score')
	otherax.plot(scipy.stats.ttest_ind(pc_both[:,:int(n_nodes*percent)],none_pc[:,:int(n_nodes*percent)])[0],color='green',label='T Score')
	sns.plt.legend()
	sns.plt.xlim(0,int(n_nodes*percent))
	sns.plt.savefig('/home/despoB/mb3152/dynamic_mod/figures/%s_%s_%s_%s_generative_new.pdf'%(n_nodes,iters,all_shortest,q_ratio),dpi=1000)
	sns.plt.show()
	bins = 10
	sns.plt.hist(np.array(pcs).reshape(-1),histtype='stepfilled',normed=True,alpha=0.35,color='yellow',label='Model',stacked=True,bins=bins)
	sns.plt.hist(real_pc,histtype='stepfilled',normed=True,alpha=0.35,color='blue',label='Real',stacked=True,bins=bins)
	sns.plt.show()
def get_real_degree(density=.05):
	try: matrix = np.load('/home/despoB/mb3152/dynamic_mod/graph_for_gen_compare.npy')
	except:
		matrices = glob.glob('/home/despoB/mb3152/dynamic_mod/matrices/**power*rfMRI_REST*matrix*')
		matrix = np.zeros((264,264,len(matrices)))
		for i,m in enumerate(matrices):
			m = np.load(m)
			np.fill_diagonal(m,0.0)
			m[np.isnan(m)] = 0
			m = np.arctanh(m)
			m[np.isfinite(m) == False] = np.nan
			matrix[:,:,i] = m
		matrix = np.nanmean(matrix,axis=2)
		np.save('/home/despoB/mb3152/dynamic_mod/graph_for_gen_compare.npy',matrix)
	graph = brain_graphs.matrix_to_igraph(matrix,density,binary=False,check_tri=True,interpolation='midpoint',normalize=False)
	vc = graph.community_fastgreedy().as_clustering()
	v = brain_graphs.brain_graph(vc)
	pc = np.array(v.pc)
	return graph.strength(weights='weight'),pc
def get_real_data(density=.2):
	gammas = []
	matrices = glob.glob('/home/despoB/mb3152/dynamic_mod/matrices/*rfMRI_REST*matrix*')
	matrix = np.load(matrices[0])
	for m in matrices[1:]:
		matrix = np.nansum([matrix,np.load(m)],axis=0)
	matrix = matrix/len(matrices)
	graph = brain_graphs.matrix_to_igraph(matrix,density,binary=False,check_tri=True,interpolation='midpoint',normalize=False)
	graph = graph.community_infomap(edge_weights='weight')
	membership = np.array(graph.membership)
	for node in range(matrix.shape[0]):
		community = membership[node]
		community_nodes = np.argwhere(membership==community)
		non_community_nodes = np.argwhere(membership!=community)
		within = np.ceil(np.sum(matrix[node,community_nodes]))
		between = np.ceil(np.sum(matrix[node,non_community_nodes]))
		if within + between == 0.0:
			continue
		gammas.append([between,within])
	graph = brain_graphs.brain_graph(graph)
	return gammas,graph.pc,graph.wmd
def small_rich_clubs():
	n_nodes = 1000
	density = .10
	rcs = []
	d_rcs = []
	mods = []
	x = ((density/2.)*n_nodes)
	for i in range(10):
		i = i * 10
		graph=Graph.Watts_Strogatz(1,n_nodes,int(np.around(x)),i*0.01)
		graph.es["weight"] = np.ones(graph.ecount())
		vc = brain_graphs.brain_graph(graph.community_fastgreedy().as_clustering())
		pc = vc.pc
		pc[np.isnan(pc)] = 0.0
		pc_emperical_phis = RC(graph,scores=pc).phis()
		pc_average_randomized_phis = np.nanmean([RC(preserve_strength(graph,randomize_topology=True),scores=pc).phis() for i in range(25)],axis=0)
		pc_normalized_phis = pc_emperical_phis/pc_average_randomized_phis
		rcs.append(pc_normalized_phis[int(graph.vcount()*.8):int(graph.vcount()*.9)])
		mods.append(vc.community.modularity)
		degree_emperical_phis = RC(graph, scores=graph.strength(weights='weight')).phis()
		average_randomized_phis = np.nanmean([RC(preserve_strength(graph,randomize_topology=True),scores=graph.strength(weights='weight')).phis() for i in range(25)],axis=0)
		degree_normalized_phis = degree_emperical_phis/average_randomized_phis
		d_rcs.append(degree_normalized_phis[int(graph.vcount()*.8):int(graph.vcount()*.9)])
def known_graphs():
	iters = 100
	pc_rc = []
	deg_rc = []
	for i in range(iters):
		while True:
			# graph=Graph.Watts_Strogatz(1,1000,3,0.25)
			graph = Graph.Barabasi(1000,3,implementation="psumtree")
			graph.es["weight"] = np.ones(graph.ecount())
			if graph.is_connected() == True:
				break
		n_nodes = graph.vcount()
		vc = brain_graphs.brain_graph(graph.community_fastgreedy().as_clustering())
		pc = vc.pc
		pc[np.isnan(pc)] = 0.0
		pc_emperical_phis = RC(graph,scores=pc).phis()
		pc_average_randomized_phis = np.nanmean([RC(preserve_strength(graph,randomize_topology=True),scores=pc).phis() for i in range(5)],axis=0)
		pc_normalized_phis = pc_emperical_phis/pc_average_randomized_phis
		degree_emperical_phis = RC(graph, scores=graph.strength(weights='weight')).phis()
		average_randomized_phis = np.nanmean([RC(preserve_strength(graph,randomize_topology=True),scores=graph.strength(weights='weight')).phis() for i in range(5)],axis=0)
		degree_normalized_phis = degree_emperical_phis/average_randomized_phis
		pc_rc.append(np.nanmean(pc_normalized_phis[int(n_nodes*.75):int(n_nodes*.9)]))
		deg_rc.append(np.nanmean(degree_normalized_phis[int(n_nodes*.75):int(n_nodes*.9)]))
		print scipy.stats.ttest_ind(pc_rc,deg_rc)
def plt_dd_fit():
	iters = 1000
	n_nodes = 100
	q_ratio = .75
	all_shortest = 'all'
	with open('/home/despoB/mb3152/dynamic_mod/results/new_gen_results/rich_club_gen_graphs_none_%s_%s_%s_%s'%(iters,n_nodes,all_shortest,q_ratio),'r') as f:
		none_graphs = pickle.load(f)
	with open('/home/despoB/mb3152/dynamic_mod/results/new_gen_results/rich_club_gen_graphs_both_%s_%s_%s_%s'%(iters,n_nodes,all_shortest,q_ratio),'r') as f:
		both_graphs = pickle.load(f)
	none_pcs = []
	none_mods = []
	for g in none_graphs:
		none_mods.append(g.degree())
		vc = g.community_fastgreedy().as_clustering()
		v = brain_graphs.brain_graph(vc)
		none_pcs.append(v.pc)
	both_pcs = []
	both_mods = []
	for g in both_graphs:
		both_mods.append(g.degree())
		none_mods.append(g.degree())
		vc = g.community_fastgreedy().as_clustering()
		v = brain_graphs.brain_graph(vc)
		both_pcs.append(v.pc)
	real_degree,real_pc = get_real_degree(0.05)
	real_degree = np.array(real_degree)
	real_pc= np.array(real_pc)
	real_degree = real_degree[real_degree>0]
	real_pc= real_pc[real_pc>0]

	model_fits = []
	random_fits = []
	for g in both_graphs:
		model_fits.append(scipy.stats.entropy(np.histogram(g.degree(),10)[0],np.histogram(np.array(real_degree),10)[0]))
	for g in none_graphs:
		random_fits.append(scipy.stats.entropy(np.histogram(g.degree(),10)[0],np.histogram(np.array(real_degree),10)[0]))
	print 'degree', scipy.stats.ttest_ind(model_fits,random_fits)
	
	model_fits = []
	random_fits = []
	for i in range(len(both_graphs)):
		model_fits.append(scipy.stats.entropy(np.histogram(both_pcs[i],10)[0],np.histogram(np.array(real_pc),10)[0]))
		random_fits.append(scipy.stats.entropy(np.histogram(none_pcs[i],10)[0],np.histogram(np.array(real_pc),10)[0]))
	print 'pc', scipy.stats.ttest_ind(model_fits,random_fits)

	bins = 10
	sns.set_style('white')		
	fig = sns.plt.figure()
	sns.plt.hist(np.array(both_pcs).reshape(-1),histtype='stepfilled',normed=True,alpha=0.35,color='blue',label='Model',stacked=True,bins=bins)
	sns.plt.legend()
	sns.plt.xlabel('Degree')
	sns.plt.ylabel('Normed Count')
	ax2 = fig.axes[0].twiny()
	ax2.hist(real_pc,histtype='stepfilled',normed=True,alpha=0.35,color='black',label='Real Data',stacked=True,bins=bins)
	sns.plt.legend(loc=2)
	sns.plt.ylim(-0.01)
	sns.plt.savefig('/home/despoB/mb3152/dynamic_mod/figures/pc_compare_model_real_%s_%s_%s_%s.pdf'%(n_nodes,iters,all_shortest,q_ratio))
	sns.plt.show()


	bins = 10
	sns.set_style('white')		
	fig = sns.plt.figure()
	sns.plt.hist(np.array(none_pcs).reshape(-1),histtype='stepfilled',normed=True,alpha=0.35,color='yellow',label='Random',stacked=True,bins=bins)
	sns.plt.legend()
	sns.plt.xlabel('Degree')
	sns.plt.ylabel('Normed Count')
	ax2 = fig.axes[0].twiny()
	ax2.hist(real_pc,histtype='stepfilled',normed=True,alpha=0.35,color='black',label='Real Data',stacked=True,bins=bins)
	sns.plt.legend(loc=2)
	sns.plt.ylim(-0.01)
	sns.plt.savefig('/home/despoB/mb3152/dynamic_mod/figures/pc_compare_random_real_%s_%s_%s_%s.pdf'%(n_nodes,iters,all_shortest,q_ratio))
	sns.plt.show()
def analyze_param_results():
	n_nodes = 100
	iters = 100
	all_shortest = 'all'
	real_degree,real_pc = get_real_degree(0.05)
	real_degree = np.array(real_degree)
	real_degree = real_degree[real_degree>0]
	real_pc = np.array(real_pc)
	real_pc[np.isnan(real_pc)] = 0.0
	df = pd.DataFrame(columns = ['Model','Variable','Q_SP Ratio','Value'])
	mean_df = pd.DataFrame(columns = ['Model','Variable','Q_SP Ratio','Value'])
	for q_ratio in np.arange(50,101)*0.01:
		pc_graphs = np.load('/home/despoB/mb3152/dynamic_mod/results/new_gen_results/rich_club_gen_graphs_both_%s_%s_%s_%s'%(iters,n_nodes,all_shortest,q_ratio))
		pc_both = np.load('/home/despoB/mb3152/dynamic_mod/results/new_gen_results/rich_club_gen_pc_both_%s_%s_%s_%s.npy'%(iters,n_nodes,all_shortest,q_ratio))
		sp = []
		bcsp = []
		mod = []
		dd_fit = []
		pc = []
		pc_rc = []
		for i,g in enumerate(pc_graphs):
			vc = g.community_fastgreedy().as_clustering()
			v = brain_graphs.brain_graph(vc)
			p = np.array(v.pc)
			community_matrix = brain_graphs.community_matrix(vc.membership,0)
			theshort = np.array(g.shortest_paths())
			sp.append(np.sum(theshort))
			bcsp.append(np.sum(theshort[community_matrix!=1]))
			mod.append(vc.modularity)
			pc.append(scipy.stats.entropy(np.histogram(p,10)[0],np.histogram(np.array(real_pc),10)[0]))
			pc_rc.append(np.nanmean(pc_both[i,int(n_nodes*.85):int(n_nodes*.9)]))
			dd_fit.append(scipy.stats.entropy(np.histogram(g.degree(),10)[0],np.histogram(np.array(real_degree),10)[0]))
		mean_df = mean_df.append({'Variable':'Efficiency','Q_SP Ratio':q_ratio,'Value':np.nanmean(sp)*-1},ignore_index=True)
		mean_df = mean_df.append({'Variable':'Between Community Efficiency','Q_SP Ratio':q_ratio,'Value':np.nanmean(bcsp)*-1},ignore_index=True)
		mean_df = mean_df.append({'Variable':'Q','Q_SP Ratio':q_ratio,'Value':np.nanmean(mod)},ignore_index=True)
		mean_df = mean_df.append({'Variable':'Degree Distribution Fit','Q_SP Ratio':q_ratio,'Value':np.nanmean(dd_fit)*-1},ignore_index=True)
		mean_df = mean_df.append({'Variable':'PC Fit','Q_SP Ratio':q_ratio,'Value':np.nanmean(pc)*-1},ignore_index=True)
		mean_df = mean_df.append({'Variable':'RCC','Q_SP Ratio':q_ratio,'Value':np.nanmean(pc_rc)},ignore_index=True)
		for s,i,j,k,l,m,n in zip(range(0,100),sp,bcsp,mod,dd_fit,pc,pc_rc):
			df = df.append({'Model':s,'Variable':'Efficiency','Q_SP Ratio':q_ratio,'Value':i*-1},ignore_index=True)
			df = df.append({'Model':s,'Variable':'Between Community Efficiency','Q_SP Ratio':q_ratio,'Value':j*-1},ignore_index=True)
			df = df.append({'Model':s,'Variable':'Q','Q_SP Ratio':q_ratio,'Value':k},ignore_index=True)
			df = df.append({'Model':s,'Variable':'Degree Distribution Fit','Q_SP Ratio':q_ratio,'Value':l*-1},ignore_index=True)
			df = df.append({'Model':s,'Variable':'PC Fit','Q_SP Ratio':q_ratio,'Value':m*-1},ignore_index=True)
			df = df.append({'Model':s,'Variable':'RCC','Q_SP Ratio':q_ratio,'Value':n},ignore_index=True)
	df['Value'][df.Variable=='RCC'][np.isfinite(df['Value'][df.Variable=='RCC'])==False] = np.nan
	df = df.dropna()
	params = ['Efficiency','Between Community Efficiency','Q','Degree Distribution Fit','PC Fit', 'RCC']
	g = sns.FacetGrid(df,col='Variable',sharex=False, sharey=False,col_wrap=3)
	for param,ax,c in zip(params,g.axes.reshape(-1),sns.color_palette()):
		d = np.zeros((100,51))
		temp_df = df[df.Variable==param].copy()
		for i, q_ratio in enumerate(np.arange(50,101)*0.01):
			d[:,i] = temp_df.Value[temp_df['Q_SP Ratio']==q_ratio].values
		sns.tsplot(d,np.arange(50,101)*0.01,ax=ax,color =c,ci=95)
		ax.set_title(param)
	sns.plt.savefig('/home/despoB/mb3152/dynamic_mod/figures/multi_parameter_figure_new.pdf')
	sns.plt.show()
	sns.plt.close()

	# ax.figure.set_size_inches(*args, **kwargs)
	def normalize(df,col_name,val_name):
	    norm_df = df.copy()
	    for feature_name in np.unique(df['%s'%(col_name)]):
	    	norm_df[val_name][norm_df[col_name]==feature_name] = scipy.stats.zscore(df[val_name][df[col_name]==feature_name])
	    return norm_df
	norm_df = normalize(df,'Variable','Value')
	sns.tsplot(data=norm_df,time='Q_SP Ratio',unit='Model',condition='Variable',value='Value')
	sns.plt.savefig('/home/despoB/mb3152/dynamic_mod/figures/parameter_figure_new.pdf')
	sns.plt.show()
def preferential_routing_multi_density(variables):
	metric = variables[0]
	n_nodes = variables[1]
	density = variables[2]
	graph = variables[3]
	np.random.seed(variables[4])
	all_shortest = variables[5]
	print variables[4],variables[0]
	q_ratio = variables[6]
	rccs = []
	for idx in range(150):
		delete_edges = graph.get_edgelist()
		if metric != 'none':
			vc = graph.community_fastgreedy().as_clustering()
			orig_q = vc.modularity
			membership = vc.membership
			orig_sps = np.sum(np.array(graph.shortest_paths()))
			community_matrix = brain_graphs.community_matrix(membership,0)
			np.fill_diagonal(community_matrix,1)
			orig_bc_sps = np.sum(np.array(graph.shortest_paths())[community_matrix!=1])
			q_edge_scores = []
			sps_edge_scores = []
			for edge in delete_edges:
				eid = graph.get_eid(edge[0],edge[1],error=False)
				graph.delete_edges(eid)
				q_edge_scores.append(VertexClustering(graph,membership).modularity-orig_q)
				if all_shortest == 'all':
					sps_edge_scores.append(orig_sps-np.sum(np.array(graph.shortest_paths())))
				if all_shortest == 'bc':
					sps_edge_scores.append(orig_bc_sps-np.sum(np.array(graph.shortest_paths())[community_matrix!=1]))
				graph.add_edge(edge[0],edge[1],weight=1)
			q_edge_scores = np.array(q_edge_scores)#Q when edge removed - original Q. High means increase in Q when edge removed.
			sps_edge_scores = np.array(sps_edge_scores)#original sps minus sps when edge removed. Higher value means more efficient.
			if len(np.unique(sps_edge_scores)) > 1:
				q_edge_scores = scipy.stats.zscore(scipy.stats.rankdata(q_edge_scores,method='min'))
				sps_edge_scores = scipy.stats.zscore(scipy.stats.rankdata(sps_edge_scores,method='min'))
				scores = (q_edge_scores*q_ratio) + (sps_edge_scores*(1-q_ratio))
			else:
				scores = scipy.stats.rankdata(q_edge_scores,method='min')
		if metric == 'q':
			edges = np.array(delete_edges)[np.argsort(scores)][int(-(graph.ecount()*.05)):]
			edges = np.array(list(edges)[::-1])
		if metric == 'none':
			scores = np.random.randint(0,100,(int(graph.ecount()*.05))).astype(float)
			edges = np.array(delete_edges)[np.argsort(scores)]
		for edge in edges:
			eid = graph.get_eid(edge[0],edge[1],error=False)
			graph.delete_edges(eid)
			if graph.is_connected() == False:
				graph.add_edge(edge[0],edge[1],weight=1)
				continue
			while True:
				i = np.random.randint(0,n_nodes)
				j = np.random.randint(0,n_nodes)
				if i == j:
					continue
				if graph.get_eid(i,j,error=False) == -1:
					graph.add_edge(i,j,weight=1)
					break
		sys.stdout.flush()
		vc = brain_graphs.brain_graph(graph.community_fastgreedy().as_clustering())
		pc = vc.pc
		pc[np.isnan(pc)] = 0.0
		pc_emperical_phis = RC(graph,scores=pc).phis()
		pc_average_randomized_phis = np.nanmean([RC(preserve_strength(graph,randomize_topology=True),scores=pc).phis() for i in range(25)],axis=0)
		pc_normalized_phis = pc_emperical_phis/pc_average_randomized_phis
		degree_emperical_phis = RC(graph, scores=graph.strength(weights='weight')).phis()
		average_randomized_phis = np.nanmean([RC(preserve_strength(graph,randomize_topology=True),scores=graph.strength(weights='weight')).phis() for i in range(25)],axis=0)
		degree_normalized_phis = degree_emperical_phis/average_randomized_phis
		rcc = pc_normalized_phis[-10:]
		if np.isfinite(np.nanmean(rcc)):
			rccs.append(np.nanmean(rcc))	
	return [metric,pc_normalized_phis,degree_normalized_phis,graph]