Esempio n. 1
0
def make_enrichment_clustergram(enr, dist_type):
	import d3_clustergram

	# make a dictionary of enr_terms and colors 
	terms_colors = {}
	for inst_enr in enr:
		terms_colors[inst_enr['name']] = inst_enr['color']

	# print(terms_colors)

	# convert enr to nodes, data_mat 
	nodes, data_mat = d3_clustergram.convert_enr_to_nodes_mat( enr )

	# cluster rows and columns 
	clust_order = d3_clustergram.cluster_row_and_column( nodes, data_mat, dist_type, enr )

	# generate d3_clust json 
	d3_json = d3_clustergram.d3_clust_single_value( nodes, clust_order, data_mat, terms_colors )

	return d3_json
Esempio n. 2
0
def generate_d3_json():
	import json_scripts
	import d3_clustergram
	import scipy
	import numpy as np 

	print('loading json in generate_d3_json')
	# load saved json of andrew data 
	data_json = json_scripts.load_to_dict('andrew_data/cumul_probs.json')

	# get nodes and data_mat 
	nodes = data_json['nodes']
	data_mat = np.asarray(data_json['data_mat'])

	print(nodes['col'])
	print(data_mat.shape)

	print('calculating clustering orders')

	# gene and resource classes 
	################################# 
	# gene class 
	gc = json_scripts.load_to_dict('gene_classes_harmonogram.json')
	# resource class 
	rc = json_scripts.load_to_dict('resource_classes_harminogram.json')

	# loop through classes
	for inst_class in gc:

		print(inst_class + '\n')

		# initialize class matrix 
		# class_mat is the subset of data_mat that only has genes of one class, e.g. kinases
		class_mat = np.array([])

		# initialize class_nodes for export 
		class_nodes = {}
		class_nodes['col'] = nodes['col']
		class_nodes['row'] = []

		# loop through the rows and check if they are in the class
		for i in range(len(nodes['row'])):

			# get the index 
			inst_gs = nodes['row'][i]

			# check if in class list 
			if inst_gs in gc[inst_class]:

				# append gene symbol name to row 
				class_nodes['row'].append(inst_gs)

				# initialize class_mat if necesary 
				if len(class_mat) == 0:
					class_mat = data_mat[i,:]
				else:

					# fill in class_mat
					class_mat = np.vstack( (class_mat, data_mat[i,:] ))  


		# actual clustering 
		########################
		# cluster the matrix, return clust_order
		clust_order = d3_clustergram.cluster_row_and_column( class_nodes, class_mat, 'cosine' )

		# # mock clustering
		# ############################
		# print('mock clustering')
		# clust_order = {}
		# # mock cluster 
		# clust_order['clust'] = {}
		# clust_order['clust']['row'] = range(len(class_nodes['row']))
		# clust_order['clust']['col'] = range(len(class_nodes['col']))
		# # mock rank 
		# clust_order['rank'] = {}
		# clust_order['rank']['row'] = range(len(class_nodes['row']))
		# clust_order['rank']['col'] = range(len(class_nodes['col']))

		print('generating d3 json')

		# generate d3_clust json: return json 
		d3_json = d3_clustergram.d3_clust_single_value(class_nodes, clust_order, class_mat )

		# add extra information (data_group) to d3_json - add resource class to d3_json['col_nodes']
		###############################################################################################
		# loop through col_nodes
		for inst_col in d3_json['col_nodes']:

			# get the inst_res
			inst_res = inst_col['name']

			# add the resource-class - data_group
			inst_col['data_group'] = rc[ inst_res ]['data_group'].replace(' ','_')

		# add extra link information about grant: this will be used to color the grant links externally 
		# from the d3_clustergram code 
		for inst_link in d3_json['links']:

			inst_link['info'] = 0

			if d3_json['col_nodes'][inst_link['target']]['name'] == 'Grants_Per_Gene':

				inst_link['info'] = 1

		print('saving to disk')

		# save visualization json 
		json_scripts.save_to_json(d3_json,'static/networks/'+inst_class+'_cumul_probs.json','no_indent')
Esempio n. 3
0
def make_ldr_clust():
    import json_scripts
    import numpy as np
    import d3_clustergram

    # load LDR data
    ldr = json_scripts.load_to_dict('ldr_mat.json')

    print(ldr.keys())

    ldr['mat'] = np.asarray(ldr['mat'])
    ldr['rl']['t'] = np.asarray(ldr['rl']['t'])
    ldr['rl']['f'] = np.asarray(ldr['rl']['f'])

    print('sum all \t' + str(np.sum(ldr['mat'])))
    print('sum yes \t' + str(np.sum(ldr['rl']['t'])))
    print('sum no  \t' + str(np.sum(ldr['rl']['f'])))

    print(len(ldr['nodes']['as']))
    print(len(ldr['nodes']['cl']))
    print(ldr['mat'].shape)

    # define nodes: unfiltered
    nodes_uf = {}
    nodes_uf['row'] = ldr['nodes']['as']
    nodes_uf['col'] = ldr['nodes']['cl']

    # define parameters
    compare_cutoff = 0.05
    min_num_compare = 2

    # filter to remove nodes with no values
    ldr['mat'], nodes = d3_clustergram.filter_sim_mat(ldr['mat'], nodes_uf, 1,
                                                      1)
    # cherrypick using hte nodes
    ldr['rl']['t'] = d3_clustergram.cherrypick_mat_from_nodes(
        nodes_uf, nodes, ldr['rl']['t'])
    ldr['rl']['f'] = d3_clustergram.cherrypick_mat_from_nodes(
        nodes_uf, nodes, ldr['rl']['f'])

    print('size all \t' + str(ldr['mat'].shape))
    print('size yes \t' + str(ldr['rl']['t'].shape))
    print('size no  \t' + str(ldr['rl']['f'].shape))
    print('\n')

    print('sum all \t' + str(np.sum(ldr['mat'])))
    print('sum yes \t' + str(np.sum(ldr['rl']['t'])))
    print('sum no  \t' + str(np.sum(ldr['rl']['f'])))
    print('total yes/no:\t' +
          str(np.sum(ldr['rl']['t']) + np.sum(ldr['rl']['f'])))

    print('\n\n\n')
    # print out nodes
    for inst_row in nodes['row']:
        print(inst_row)

    print('\n\n\n')
    # print out nodes
    for inst_row in nodes['row']:
        print(inst_row)

    print('\n\n\n')

    # cluster rows and columns
    print('calculating clustering')
    clust_order = d3_clustergram.cluster_row_and_column(
        nodes, ldr['mat'], 'cosine', compare_cutoff, min_num_compare)

    print('finished calculating clustering')

    # write the d3_clustergram
    base_path = 'static/networks/'
    full_path = base_path + 'LDR_as_cl.json'

    # add class information
    row_class = {}
    col_class = {}

    print(len(nodes['row']))
    print(len(nodes['col']))

    # # last minute cleaning up of row/col names
    # for i in range(len(nodes['col'])):
    # 	nodes['col'][i] = nodes['col'][i].replace('/ single drugs','')
    # for i in range(len(nodes['row'])):
    # 	nodes['row'][i] = nodes['row'][i].replace('cell lines','')

    # write the clustergram
    d3_clustergram.write_json_single_value(nodes, clust_order, ldr, full_path,
                                           row_class, col_class)
Esempio n. 4
0
def make_ldr_clust():
	import json_scripts
	import numpy as np
	import d3_clustergram 

	# load LDR data
	ldr = json_scripts.load_to_dict('ldr_mat.json')

	print(ldr.keys())

	ldr['mat'] = np.asarray(ldr['mat'])
	ldr['rl']['t'] = np.asarray(ldr['rl']['t'])
	ldr['rl']['f'] = np.asarray(ldr['rl']['f'])

	print( 'sum all \t' + str(np.sum(ldr['mat'])) )
	print( 'sum yes \t' + str(np.sum(ldr['rl']['t'])) )
	print( 'sum no  \t' + str(np.sum(ldr['rl']['f'])) )

	print(len(ldr['nodes']['as']))
	print(len(ldr['nodes']['cl']))
	print(ldr['mat'].shape)

	# define nodes: unfiltered
	nodes_uf = {}
	nodes_uf['row'] = ldr['nodes']['as']
	nodes_uf['col'] = ldr['nodes']['cl']

	# define parameters
	compare_cutoff = 0.05
	min_num_compare = 2

	# filter to remove nodes with no values 
	ldr['mat'], nodes = d3_clustergram.filter_sim_mat( ldr['mat'], nodes_uf, 1, 1 )
	# cherrypick using hte nodes 
	ldr['rl']['t'] = d3_clustergram.cherrypick_mat_from_nodes(nodes_uf, nodes, ldr['rl']['t'])
	ldr['rl']['f'] = d3_clustergram.cherrypick_mat_from_nodes(nodes_uf, nodes, ldr['rl']['f'])

	print( 'size all \t' + str(ldr['mat'].shape) )
	print( 'size yes \t' + str(ldr['rl']['t'].shape) )
	print( 'size no  \t' + str(ldr['rl']['f'].shape) )	
	print('\n')

	print( 'sum all \t' + str(np.sum(ldr['mat'])) )
	print( 'sum yes \t' + str(np.sum(ldr['rl']['t'])) )
	print( 'sum no  \t' + str(np.sum(ldr['rl']['f'])) )	
	print( 'total yes/no:\t' + str( np.sum(ldr['rl']['t']) + np.sum(ldr['rl']['f']) ) )

	print('\n\n\n')
	# print out nodes 
	for inst_row in nodes['row']:
		print(inst_row)

		
	print('\n\n\n')
	# print out nodes 
	for inst_row in nodes['row']:
		print(inst_row)

	print('\n\n\n')

	# cluster rows and columns 
	print('calculating clustering')
	clust_order = d3_clustergram.cluster_row_and_column( nodes, ldr['mat'], 'cosine', compare_cutoff, min_num_compare )

	print('finished calculating clustering')

	# write the d3_clustergram 
	base_path = 'static/networks/'
	full_path = base_path + 'LDR_as_cl.json'

	# add class information 
	row_class = {}
	col_class = {}

	print(len(nodes['row']))
	print(len(nodes['col']))

	# # last minute cleaning up of row/col names 
	# for i in range(len(nodes['col'])):
	# 	nodes['col'][i] = nodes['col'][i].replace('/ single drugs','')
	# for i in range(len(nodes['row'])):
	# 	nodes['row'][i] = nodes['row'][i].replace('cell lines','')

	# write the clustergram 
	d3_clustergram.write_json_single_value( nodes, clust_order, ldr, full_path, row_class, col_class)