Example #1
0
def make_directed_json_graph_soc(gmt_fn, d_id_name, d_id_category, d_category_color, outfn=None):
	# make directed graph based on SOC - PT
	d_gmt = read_gmt(gmt_fn)
	d_gmt_filt = {}
	for term, genes in d_gmt.items():
		if len(genes) >= 5:
			d_gmt_filt[term] = genes
	d_gmt = d_gmt_filt

	print 'number of terms:', len(d_gmt)
	umls_ids_kept = d_gmt.keys()
	adj_matrix = jaccard_matrix(d_gmt)
	m = adj_matrix > 0.2
	adj_matrix = adj_matrix * m.astype(int)
	Gu = nx.from_numpy_matrix(adj_matrix) # undirected Graph, to get size
	G = nx.DiGraph()
	for i in range(len(umls_ids_kept)):
		umls_id = umls_ids_kept[i]
		name = d_id_name[umls_id]
		category = d_id_category[umls_id]
		color = d_category_color[category]

		G.add_edge('root', category)
		G.add_edge(category, umls_id)

		G.node[umls_id]['size'] = Gu.degree(i)
		G.node[umls_id]['label'] = name
		G.node[umls_id]['color'] = color
	print G.number_of_nodes(), G.number_of_edges()		
	graph_data = json_graph.tree_data(G,root='root')
	json.dump(graph_data, open(outfn, 'wb'))
	return
Example #2
0
def network_layout(gmt_fn, outfn=None):
	## make a Graph object and write to gml for Gephi to 
	## do the layout

	d_gmt = read_gmt(gmt_fn)
	d_gmt_filt = {}
	for term, genes in d_gmt.items():
		if len(genes) >= 5:
			d_gmt_filt[term] = genes
	d_gmt = d_gmt_filt

	print 'number of terms:', len(d_gmt)
	umls_ids_kept = d_gmt.keys()
	adj_matrix = jaccard_matrix(d_gmt)

	m = adj_matrix > 0.2
	# degrees = adj_matrix.sum(axis=0)
	adj_matrix = adj_matrix * m.astype(int)
	
	G = nx.from_numpy_matrix(adj_matrix)

	print 'G: ',G.number_of_edges(), G.number_of_nodes()

	for i in range(adj_matrix.shape[0]):
		# G.node[i]['size'] = degrees[i]
		# G.node[i]['size'] = len(d_gmt[umls_ids_kept[i]])
		G.node[i]['size'] = G.degree(i)
		G.node[i]['id'] = umls_ids_kept[i]

	if outfn is not None:	
		nx.write_gml(G, outfn)
	return G
Example #3
0
def make_directed_json_graph(gmt_fn, d_id_name, d_id_category, d_category_color, outfn=None):
	# perform HC and make a directed graph and write to json
	# for pack visualization
	d_gmt = read_gmt(gmt_fn)
	d_gmt_filt = {}
	for term, genes in d_gmt.items():
		if len(genes) >= 5:
			d_gmt_filt[term] = genes
	d_gmt = d_gmt_filt

	print 'number of terms:', len(d_gmt)
	umls_ids_kept = d_gmt.keys()
	adj_matrix = jaccard_matrix(d_gmt)

	hc = AgglomerativeClustering(n_clusters=10)
	hc.fit(adj_matrix)

	m = adj_matrix > 0.2
	adj_matrix = adj_matrix * m.astype(int)
	Gu = nx.from_numpy_matrix(adj_matrix) # undirected Graph, to get size

	G = nx.DiGraph()
	print adj_matrix.shape, len(umls_ids_kept)
	for i in range(adj_matrix.shape[0]):
		cluster_label = hc.labels_[i]
		umls_id = umls_ids_kept[i]
		name = d_id_name[umls_id]
		G.add_edge('root', cluster_label)
		G.add_edge(cluster_label, umls_id)
		G.node[umls_id]['size'] = Gu.degree(i)
		G.node[umls_id]['label'] = name

		category = d_id_category[umls_id]
		color = d_category_color[category]
		G.node[umls_id]['color'] = color
	print G.number_of_nodes(), G.number_of_edges()	
	graph_data = json_graph.tree_data(G,root='root')
	json.dump(graph_data, open(outfn, 'wb'))
	return