Ejemplo n.º 1
0
def make_enrichment_clustergram(enr, dist_type):
	import d3_clustergram

	# make a dictionary of enr_terms and colors 
	terms_colors = {}
	for inst_enr in enr:
		terms_colors[inst_enr['name']] = inst_enr['color']

	# print(terms_colors)

	# convert enr to nodes, data_mat 
	nodes, data_mat = d3_clustergram.convert_enr_to_nodes_mat( enr )

	# cluster rows and columns 
	clust_order = d3_clustergram.cluster_row_and_column( nodes, data_mat, dist_type, enr )

	# generate d3_clust json 
	d3_json = d3_clustergram.d3_clust_single_value( nodes, clust_order, data_mat, terms_colors )

	return d3_json
Ejemplo n.º 2
0
def generate_d3_json():
	import json_scripts
	import d3_clustergram
	import scipy
	import numpy as np 

	print('loading json in generate_d3_json')
	# load saved json of andrew data 
	data_json = json_scripts.load_to_dict('andrew_data/cumul_probs.json')

	# get nodes and data_mat 
	nodes = data_json['nodes']
	data_mat = np.asarray(data_json['data_mat'])

	print(nodes['col'])
	print(data_mat.shape)

	print('calculating clustering orders')

	# gene and resource classes 
	################################# 
	# gene class 
	gc = json_scripts.load_to_dict('gene_classes_harmonogram.json')
	# resource class 
	rc = json_scripts.load_to_dict('resource_classes_harminogram.json')

	# loop through classes
	for inst_class in gc:

		print(inst_class + '\n')

		# initialize class matrix 
		# class_mat is the subset of data_mat that only has genes of one class, e.g. kinases
		class_mat = np.array([])

		# initialize class_nodes for export 
		class_nodes = {}
		class_nodes['col'] = nodes['col']
		class_nodes['row'] = []

		# loop through the rows and check if they are in the class
		for i in range(len(nodes['row'])):

			# get the index 
			inst_gs = nodes['row'][i]

			# check if in class list 
			if inst_gs in gc[inst_class]:

				# append gene symbol name to row 
				class_nodes['row'].append(inst_gs)

				# initialize class_mat if necesary 
				if len(class_mat) == 0:
					class_mat = data_mat[i,:]
				else:

					# fill in class_mat
					class_mat = np.vstack( (class_mat, data_mat[i,:] ))  


		# actual clustering 
		########################
		# cluster the matrix, return clust_order
		clust_order = d3_clustergram.cluster_row_and_column( class_nodes, class_mat, 'cosine' )

		# # mock clustering
		# ############################
		# print('mock clustering')
		# clust_order = {}
		# # mock cluster 
		# clust_order['clust'] = {}
		# clust_order['clust']['row'] = range(len(class_nodes['row']))
		# clust_order['clust']['col'] = range(len(class_nodes['col']))
		# # mock rank 
		# clust_order['rank'] = {}
		# clust_order['rank']['row'] = range(len(class_nodes['row']))
		# clust_order['rank']['col'] = range(len(class_nodes['col']))

		print('generating d3 json')

		# generate d3_clust json: return json 
		d3_json = d3_clustergram.d3_clust_single_value(class_nodes, clust_order, class_mat )

		# add extra information (data_group) to d3_json - add resource class to d3_json['col_nodes']
		###############################################################################################
		# loop through col_nodes
		for inst_col in d3_json['col_nodes']:

			# get the inst_res
			inst_res = inst_col['name']

			# add the resource-class - data_group
			inst_col['data_group'] = rc[ inst_res ]['data_group'].replace(' ','_')

		# add extra link information about grant: this will be used to color the grant links externally 
		# from the d3_clustergram code 
		for inst_link in d3_json['links']:

			inst_link['info'] = 0

			if d3_json['col_nodes'][inst_link['target']]['name'] == 'Grants_Per_Gene':

				inst_link['info'] = 1

		print('saving to disk')

		# save visualization json 
		json_scripts.save_to_json(d3_json,'static/networks/'+inst_class+'_cumul_probs.json','no_indent')