Ejemplo n.º 1
0
def network_layout(gmt_fn, outfn=None):
	## make a Graph object and write to gml for Gephi to 
	## do the layout

	d_gmt = read_gmt(gmt_fn)
	d_gmt_filt = {}
	for term, genes in d_gmt.items():
		if len(genes) >= 5:
			d_gmt_filt[term] = genes
	d_gmt = d_gmt_filt

	print 'number of terms:', len(d_gmt)
	umls_ids_kept = d_gmt.keys()
	adj_matrix = jaccard_matrix(d_gmt)

	m = adj_matrix > 0.2
	# degrees = adj_matrix.sum(axis=0)
	adj_matrix = adj_matrix * m.astype(int)
	
	G = nx.from_numpy_matrix(adj_matrix)

	print 'G: ',G.number_of_edges(), G.number_of_nodes()

	for i in range(adj_matrix.shape[0]):
		# G.node[i]['size'] = degrees[i]
		# G.node[i]['size'] = len(d_gmt[umls_ids_kept[i]])
		G.node[i]['size'] = G.degree(i)
		G.node[i]['id'] = umls_ids_kept[i]

	if outfn is not None:	
		nx.write_gml(G, outfn)
	return G
Ejemplo n.º 2
0
def make_directed_json_graph_soc(gmt_fn, d_id_name, d_id_category, d_category_color, outfn=None):
	# make directed graph based on SOC - PT
	d_gmt = read_gmt(gmt_fn)
	d_gmt_filt = {}
	for term, genes in d_gmt.items():
		if len(genes) >= 5:
			d_gmt_filt[term] = genes
	d_gmt = d_gmt_filt

	print 'number of terms:', len(d_gmt)
	umls_ids_kept = d_gmt.keys()
	adj_matrix = jaccard_matrix(d_gmt)
	m = adj_matrix > 0.2
	adj_matrix = adj_matrix * m.astype(int)
	Gu = nx.from_numpy_matrix(adj_matrix) # undirected Graph, to get size
	G = nx.DiGraph()
	for i in range(len(umls_ids_kept)):
		umls_id = umls_ids_kept[i]
		name = d_id_name[umls_id]
		category = d_id_category[umls_id]
		color = d_category_color[category]

		G.add_edge('root', category)
		G.add_edge(category, umls_id)

		G.node[umls_id]['size'] = Gu.degree(i)
		G.node[umls_id]['label'] = name
		G.node[umls_id]['color'] = color
	print G.number_of_nodes(), G.number_of_edges()		
	graph_data = json_graph.tree_data(G,root='root')
	json.dump(graph_data, open(outfn, 'wb'))
	return
Ejemplo n.º 3
0
def network_layout(gmt_fn, outfn=None):
    ## make a Graph object and write to gml for Gephi to
    ## do the layout

    d_gmt = read_gmt(gmt_fn)
    d_gmt_filt = {}
    for term, genes in d_gmt.items():
        if len(genes) >= 5:
            d_gmt_filt[term] = genes
    d_gmt = d_gmt_filt

    print 'number of terms:', len(d_gmt)
    umls_ids_kept = d_gmt.keys()
    adj_matrix = jaccard_matrix(d_gmt)

    m = adj_matrix > 0.2
    # degrees = adj_matrix.sum(axis=0)
    adj_matrix = adj_matrix * m.astype(int)

    G = nx.from_numpy_matrix(adj_matrix)

    print 'G: ', G.number_of_edges(), G.number_of_nodes()

    for i in range(adj_matrix.shape[0]):
        # G.node[i]['size'] = degrees[i]
        # G.node[i]['size'] = len(d_gmt[umls_ids_kept[i]])
        G.node[i]['size'] = G.degree(i)
        G.node[i]['id'] = umls_ids_kept[i]

    if outfn is not None:
        nx.write_gml(G, outfn)
    return G
Ejemplo n.º 4
0
def make_directed_json_graph(gmt_fn,
                             d_id_name,
                             d_id_category,
                             d_category_color,
                             outfn=None):
    # perform HC and make a directed graph and write to json
    # for pack visualization
    d_gmt = read_gmt(gmt_fn)
    d_gmt_filt = {}
    for term, genes in d_gmt.items():
        if len(genes) >= 5:
            d_gmt_filt[term] = genes
    d_gmt = d_gmt_filt

    print 'number of terms:', len(d_gmt)
    umls_ids_kept = d_gmt.keys()
    adj_matrix = jaccard_matrix(d_gmt)

    hc = AgglomerativeClustering(n_clusters=10)
    hc.fit(adj_matrix)

    m = adj_matrix > 0.2
    adj_matrix = adj_matrix * m.astype(int)
    Gu = nx.from_numpy_matrix(adj_matrix)  # undirected Graph, to get size

    G = nx.DiGraph()
    print adj_matrix.shape, len(umls_ids_kept)
    for i in range(adj_matrix.shape[0]):
        cluster_label = hc.labels_[i]
        umls_id = umls_ids_kept[i]
        name = d_id_name[umls_id]
        G.add_edge('root', cluster_label)
        G.add_edge(cluster_label, umls_id)
        G.node[umls_id]['size'] = Gu.degree(i)
        G.node[umls_id]['label'] = name

        category = d_id_category[umls_id]
        color = d_category_color[category]
        G.node[umls_id]['color'] = color
    print G.number_of_nodes(), G.number_of_edges()
    graph_data = json_graph.tree_data(G, root='root')
    json.dump(graph_data, open(outfn, 'wb'))
    return
Ejemplo n.º 5
0
def make_directed_json_graph(gmt_fn, d_id_name, d_id_category, d_category_color, outfn=None):
	# perform HC and make a directed graph and write to json
	# for pack visualization
	d_gmt = read_gmt(gmt_fn)
	d_gmt_filt = {}
	for term, genes in d_gmt.items():
		if len(genes) >= 5:
			d_gmt_filt[term] = genes
	d_gmt = d_gmt_filt

	print 'number of terms:', len(d_gmt)
	umls_ids_kept = d_gmt.keys()
	adj_matrix = jaccard_matrix(d_gmt)

	hc = AgglomerativeClustering(n_clusters=10)
	hc.fit(adj_matrix)

	m = adj_matrix > 0.2
	adj_matrix = adj_matrix * m.astype(int)
	Gu = nx.from_numpy_matrix(adj_matrix) # undirected Graph, to get size

	G = nx.DiGraph()
	print adj_matrix.shape, len(umls_ids_kept)
	for i in range(adj_matrix.shape[0]):
		cluster_label = hc.labels_[i]
		umls_id = umls_ids_kept[i]
		name = d_id_name[umls_id]
		G.add_edge('root', cluster_label)
		G.add_edge(cluster_label, umls_id)
		G.node[umls_id]['size'] = Gu.degree(i)
		G.node[umls_id]['label'] = name

		category = d_id_category[umls_id]
		color = d_category_color[category]
		G.node[umls_id]['color'] = color
	print G.number_of_nodes(), G.number_of_edges()	
	graph_data = json_graph.tree_data(G,root='root')
	json.dump(graph_data, open(outfn, 'wb'))
	return
Ejemplo n.º 6
0
def make_directed_json_graph_soc(gmt_fn,
                                 d_id_name,
                                 d_id_category,
                                 d_category_color,
                                 outfn=None):
    # make directed graph based on SOC - PT
    d_gmt = read_gmt(gmt_fn)
    d_gmt_filt = {}
    for term, genes in d_gmt.items():
        if len(genes) >= 5:
            d_gmt_filt[term] = genes
    d_gmt = d_gmt_filt

    print 'number of terms:', len(d_gmt)
    umls_ids_kept = d_gmt.keys()
    adj_matrix = jaccard_matrix(d_gmt)
    m = adj_matrix > 0.2
    adj_matrix = adj_matrix * m.astype(int)
    Gu = nx.from_numpy_matrix(adj_matrix)  # undirected Graph, to get size
    G = nx.DiGraph()
    for i in range(len(umls_ids_kept)):
        umls_id = umls_ids_kept[i]
        name = d_id_name[umls_id]
        category = d_id_category[umls_id]
        color = d_category_color[category]

        G.add_edge('root', category)
        G.add_edge(category, umls_id)

        G.node[umls_id]['size'] = Gu.degree(i)
        G.node[umls_id]['label'] = name
        G.node[umls_id]['color'] = color
    print G.number_of_nodes(), G.number_of_edges()
    graph_data = json_graph.tree_data(G, root='root')
    json.dump(graph_data, open(outfn, 'wb'))
    return