def make_enrichment_clustergram(enr, dist_type): import d3_clustergram # make a dictionary of enr_terms and colors terms_colors = {} for inst_enr in enr: terms_colors[inst_enr['name']] = inst_enr['color'] # print(terms_colors) # convert enr to nodes, data_mat nodes, data_mat = d3_clustergram.convert_enr_to_nodes_mat( enr ) # cluster rows and columns clust_order = d3_clustergram.cluster_row_and_column( nodes, data_mat, dist_type, enr ) # generate d3_clust json d3_json = d3_clustergram.d3_clust_single_value( nodes, clust_order, data_mat, terms_colors ) return d3_json
def generate_d3_json(): import json_scripts import d3_clustergram import scipy import numpy as np print('loading json in generate_d3_json') # load saved json of andrew data data_json = json_scripts.load_to_dict('andrew_data/cumul_probs.json') # get nodes and data_mat nodes = data_json['nodes'] data_mat = np.asarray(data_json['data_mat']) print(nodes['col']) print(data_mat.shape) print('calculating clustering orders') # gene and resource classes ################################# # gene class gc = json_scripts.load_to_dict('gene_classes_harmonogram.json') # resource class rc = json_scripts.load_to_dict('resource_classes_harminogram.json') # loop through classes for inst_class in gc: print(inst_class + '\n') # initialize class matrix # class_mat is the subset of data_mat that only has genes of one class, e.g. kinases class_mat = np.array([]) # initialize class_nodes for export class_nodes = {} class_nodes['col'] = nodes['col'] class_nodes['row'] = [] # loop through the rows and check if they are in the class for i in range(len(nodes['row'])): # get the index inst_gs = nodes['row'][i] # check if in class list if inst_gs in gc[inst_class]: # append gene symbol name to row class_nodes['row'].append(inst_gs) # initialize class_mat if necesary if len(class_mat) == 0: class_mat = data_mat[i,:] else: # fill in class_mat class_mat = np.vstack( (class_mat, data_mat[i,:] )) # actual clustering ######################## # cluster the matrix, return clust_order clust_order = d3_clustergram.cluster_row_and_column( class_nodes, class_mat, 'cosine' ) # # mock clustering # ############################ # print('mock clustering') # clust_order = {} # # mock cluster # clust_order['clust'] = {} # clust_order['clust']['row'] = range(len(class_nodes['row'])) # clust_order['clust']['col'] = range(len(class_nodes['col'])) # # mock rank # clust_order['rank'] = {} # clust_order['rank']['row'] = range(len(class_nodes['row'])) # clust_order['rank']['col'] = range(len(class_nodes['col'])) print('generating d3 json') # generate d3_clust json: return json d3_json = d3_clustergram.d3_clust_single_value(class_nodes, clust_order, class_mat ) # add extra information (data_group) to d3_json - add resource class to d3_json['col_nodes'] ############################################################################################### # loop through col_nodes for inst_col in d3_json['col_nodes']: # get the inst_res inst_res = inst_col['name'] # add the resource-class - data_group inst_col['data_group'] = rc[ inst_res ]['data_group'].replace(' ','_') # add extra link information about grant: this will be used to color the grant links externally # from the d3_clustergram code for inst_link in d3_json['links']: inst_link['info'] = 0 if d3_json['col_nodes'][inst_link['target']]['name'] == 'Grants_Per_Gene': inst_link['info'] = 1 print('saving to disk') # save visualization json json_scripts.save_to_json(d3_json,'static/networks/'+inst_class+'_cumul_probs.json','no_indent')
def make_ldr_clust(): import json_scripts import numpy as np import d3_clustergram # load LDR data ldr = json_scripts.load_to_dict('ldr_mat.json') print(ldr.keys()) ldr['mat'] = np.asarray(ldr['mat']) ldr['rl']['t'] = np.asarray(ldr['rl']['t']) ldr['rl']['f'] = np.asarray(ldr['rl']['f']) print('sum all \t' + str(np.sum(ldr['mat']))) print('sum yes \t' + str(np.sum(ldr['rl']['t']))) print('sum no \t' + str(np.sum(ldr['rl']['f']))) print(len(ldr['nodes']['as'])) print(len(ldr['nodes']['cl'])) print(ldr['mat'].shape) # define nodes: unfiltered nodes_uf = {} nodes_uf['row'] = ldr['nodes']['as'] nodes_uf['col'] = ldr['nodes']['cl'] # define parameters compare_cutoff = 0.05 min_num_compare = 2 # filter to remove nodes with no values ldr['mat'], nodes = d3_clustergram.filter_sim_mat(ldr['mat'], nodes_uf, 1, 1) # cherrypick using hte nodes ldr['rl']['t'] = d3_clustergram.cherrypick_mat_from_nodes( nodes_uf, nodes, ldr['rl']['t']) ldr['rl']['f'] = d3_clustergram.cherrypick_mat_from_nodes( nodes_uf, nodes, ldr['rl']['f']) print('size all \t' + str(ldr['mat'].shape)) print('size yes \t' + str(ldr['rl']['t'].shape)) print('size no \t' + str(ldr['rl']['f'].shape)) print('\n') print('sum all \t' + str(np.sum(ldr['mat']))) print('sum yes \t' + str(np.sum(ldr['rl']['t']))) print('sum no \t' + str(np.sum(ldr['rl']['f']))) print('total yes/no:\t' + str(np.sum(ldr['rl']['t']) + np.sum(ldr['rl']['f']))) print('\n\n\n') # print out nodes for inst_row in nodes['row']: print(inst_row) print('\n\n\n') # print out nodes for inst_row in nodes['row']: print(inst_row) print('\n\n\n') # cluster rows and columns print('calculating clustering') clust_order = d3_clustergram.cluster_row_and_column( nodes, ldr['mat'], 'cosine', compare_cutoff, min_num_compare) print('finished calculating clustering') # write the d3_clustergram base_path = 'static/networks/' full_path = base_path + 'LDR_as_cl.json' # add class information row_class = {} col_class = {} print(len(nodes['row'])) print(len(nodes['col'])) # # last minute cleaning up of row/col names # for i in range(len(nodes['col'])): # nodes['col'][i] = nodes['col'][i].replace('/ single drugs','') # for i in range(len(nodes['row'])): # nodes['row'][i] = nodes['row'][i].replace('cell lines','') # write the clustergram d3_clustergram.write_json_single_value(nodes, clust_order, ldr, full_path, row_class, col_class)
def make_ldr_clust(): import json_scripts import numpy as np import d3_clustergram # load LDR data ldr = json_scripts.load_to_dict('ldr_mat.json') print(ldr.keys()) ldr['mat'] = np.asarray(ldr['mat']) ldr['rl']['t'] = np.asarray(ldr['rl']['t']) ldr['rl']['f'] = np.asarray(ldr['rl']['f']) print( 'sum all \t' + str(np.sum(ldr['mat'])) ) print( 'sum yes \t' + str(np.sum(ldr['rl']['t'])) ) print( 'sum no \t' + str(np.sum(ldr['rl']['f'])) ) print(len(ldr['nodes']['as'])) print(len(ldr['nodes']['cl'])) print(ldr['mat'].shape) # define nodes: unfiltered nodes_uf = {} nodes_uf['row'] = ldr['nodes']['as'] nodes_uf['col'] = ldr['nodes']['cl'] # define parameters compare_cutoff = 0.05 min_num_compare = 2 # filter to remove nodes with no values ldr['mat'], nodes = d3_clustergram.filter_sim_mat( ldr['mat'], nodes_uf, 1, 1 ) # cherrypick using hte nodes ldr['rl']['t'] = d3_clustergram.cherrypick_mat_from_nodes(nodes_uf, nodes, ldr['rl']['t']) ldr['rl']['f'] = d3_clustergram.cherrypick_mat_from_nodes(nodes_uf, nodes, ldr['rl']['f']) print( 'size all \t' + str(ldr['mat'].shape) ) print( 'size yes \t' + str(ldr['rl']['t'].shape) ) print( 'size no \t' + str(ldr['rl']['f'].shape) ) print('\n') print( 'sum all \t' + str(np.sum(ldr['mat'])) ) print( 'sum yes \t' + str(np.sum(ldr['rl']['t'])) ) print( 'sum no \t' + str(np.sum(ldr['rl']['f'])) ) print( 'total yes/no:\t' + str( np.sum(ldr['rl']['t']) + np.sum(ldr['rl']['f']) ) ) print('\n\n\n') # print out nodes for inst_row in nodes['row']: print(inst_row) print('\n\n\n') # print out nodes for inst_row in nodes['row']: print(inst_row) print('\n\n\n') # cluster rows and columns print('calculating clustering') clust_order = d3_clustergram.cluster_row_and_column( nodes, ldr['mat'], 'cosine', compare_cutoff, min_num_compare ) print('finished calculating clustering') # write the d3_clustergram base_path = 'static/networks/' full_path = base_path + 'LDR_as_cl.json' # add class information row_class = {} col_class = {} print(len(nodes['row'])) print(len(nodes['col'])) # # last minute cleaning up of row/col names # for i in range(len(nodes['col'])): # nodes['col'][i] = nodes['col'][i].replace('/ single drugs','') # for i in range(len(nodes['row'])): # nodes['row'][i] = nodes['row'][i].replace('cell lines','') # write the clustergram d3_clustergram.write_json_single_value( nodes, clust_order, ldr, full_path, row_class, col_class)