def load_resource_real_names(): import json_scripts print('loading resource real names') # open text file filename = 'andrew_data/resource_mapping_names.txt' f = open(filename,'r') lines = f.readlines() f.close() # make a dictionar of real resource names rn = {} # loop through the lines for inst_line in lines: # clean the line inst_line = inst_line.strip().split('\t') # if there is a real name, keep the resource if len(inst_line) == 2: # add the resource and real name to dict - no spaces rn[inst_line[0]] = inst_line[1].replace(' ','_') # save dictionary to json json_scripts.save_to_json(rn,'resource_real_names.json','indent')
def main(): import cookielib, poster, urllib2, json, json_scripts # make a get request to get the gmt names and meta data from Enrichr x = urllib2.urlopen('http://amp.pharm.mssm.edu/Enrichr/geneSetLibrary?mode=meta') response = x.read() gmt_data = json.loads(response) # local version # gmt_data = json_scripts.load_to_dict('enrichr_gmts.json') # generate list of gmts gmt_names = [] # get library names for inst_gmt in gmt_data['libraries']: # only include active gmts if inst_gmt['isActive'] == True: gmt_names.append(inst_gmt['libraryName']) inst_dict = {} inst_dict['names'] = gmt_names # save json with list of gmt names json_scripts.save_to_json(inst_dict,'gmt_names.json','noindent')
def load_resource_classes(): import json_scripts print('loading resource classes') # open text file filename = 'andrew_data/resource_classes.txt' f = open(filename,'r') lines = f.readlines() f.close() # add the information into a dictionary rc = {} # loop through the lines for i in range(len(lines)): # get a list of line components inst_line = lines[i].split('\t') # get key names from first row if i != 0: # I need dataset name, not resource name ################ # get resource name - no spaces inst_name = inst_line[1].replace(' ','_') # initialize dictionary rc[inst_name] = {} # # dataset name # rc[inst_name]['dataset_name'] = inst_line[1] # description rc[inst_name]['description'] = inst_line[2] # data type rc[inst_name]['data_type'] = inst_line[3] # data group rc[inst_name]['data_group'] = inst_line[4] # association rc[inst_name]['association'] = inst_line[5] # attribute type rc[inst_name]['attribute_type'] = inst_line[6] # attribute group rc[inst_name]['attribute_group'] = inst_line[7] # save resource classes json_scripts.save_to_json(rc,'resource_classes_harminogram.json','indent')
def load_sigs_to_json(): import glob print('load') # normal files file_names = glob.glob('files_2-17-2017/hdf_day*.txt') pert_files = glob.glob('files_2-17-2017/Pert*.txt') file_names = file_names + pert_files print('\n\n') print(file_names) print('\n\n') # # full char dir files # file_names = glob.glob('files_2-17-2017/big*.txt') # store all signatures in a dictionary exp_sigs = {} for inst_filename in file_names: inst_sig = inst_filename.split('.txt')[0].split('/')[1].split( '_chdir')[0] # initialize dictionary for signature exp_sigs[inst_sig] = {} f = open(inst_filename, 'r') lines = f.readlines() for inst_line in lines: inst_line = inst_line.strip().split(',') inst_gene = inst_line[0] inst_value = inst_line[1] exp_sigs[inst_sig][inst_gene] = inst_value f.close() json_scripts.save_to_json(exp_sigs, 'proc_data/exp-pert_sigs.json', indent='indent')
def main(): ''' I'm working on making similarity matrices for KIN, IC, and GPCR genes based on data in the Hzome. Here I'm gathering my old (Hgram) gene lists with the latest list of the 'dark' genes from the KMC 2017 grant. I'm saving these to a new JSON for later use. The next step is to calculate the similarity matrices and visualize them in a notebook or webpage. ''' import json_scripts hgram_info = json_scripts.load_to_dict( '../harmonogram_classes/gene_classes_harmonogram.json') grant_poi = json_scripts.load_to_dict( '../grant_pois/proteins_of_interest.json') gene_types = ['KIN', 'IC', 'GPCR'] # make a new json with merged all genes and dark gene info gene_info = {} for inst_type in gene_types: # add any dark genes to all_genes dark_genes = grant_poi[inst_type] all_genes = hgram_info[inst_type] + dark_genes dark_genes = sorted(list(set(dark_genes))) all_genes = sorted(list(set(all_genes))) print(inst_type) print('all: ' + str(len(all_genes))) print('dark: ' + str(len(dark_genes))) print(len(list(set(dark_genes) - set(all_genes)))) gene_info[inst_type] = {} gene_info[inst_type]['all'] = all_genes gene_info[inst_type]['dark'] = dark_genes print('\n\n') json_scripts.save_to_json(gene_info, '../grant_pois/gene_info_with_dark.json', indent='indent')
def load_grants_per_gene(): import json_scripts # open text file filename = 'andrew_data/grantspergene_weighted_standardized.txt' f = open(filename,'r') lines = f.readlines() f.close() # grab the column names col_names = lines[0].strip().split('\t') # initialize dictionary grant_gene = {} # loop through the lines for i in range(len(lines)): # grab the data for each gene if i > 0: # get inst_line inst_line = lines[i].strip().split('\t') # get gene name inst_name = inst_line[0] # initialize dictionary for gene grant_gene[inst_name] = {} # save information on gene for j in range(len(col_names)): # skip first element if j > 0: # save column name as dictionary key grant_gene[inst_name][col_names[j]] = float(inst_line[j]) # print(grant_gene[inst_name]) # print(grant_gene['SELL']) # save to json json_scripts.save_to_json(grant_gene, 'andrew_data/grants_per_gene.json', 'indent')
def make_protein_dictionary(): ''' This script makes a python dictionary from the proteins of interest lists and saves them as a JSON for later use. ''' print('-- generate dictionary with protein names') import json_scripts poi = {} for inst_type in ['kinase', 'gpcr', 'ion_channel']: inst_names = load_names(inst_type) poi[inst_type] = inst_names json_scripts.save_to_json(poi, 'proteins_of_interest/proteins_of_interest.json', indent='indent')
def main(): import json_scripts # load gene list text file filename = 'example_gene_50.txt' f = open(filename,'r') genes_text = f.readlines() f.close() # clean gene names genes_text = [d.strip().upper() for d in genes_text] # remove duplicates genes_text = list(set(genes_text)) print(len(genes_text)) # generate dictionary example_list = {} example_list['genes'] = genes_text # save to json json_scripts.save_to_json(example_list,'example_gene_50.json','no_indent')
def construct_array(): import json_scripts import scipy print('\nconstructing array\n') # load the LDR data is json format ldr = json_scripts.load_to_dict('LDR/LDR_api.json') # load cl and as dictionary as_cl_dict = json_scripts.load_to_dict('as_cl_dict.json') # get nodes from 'short name' dictionary values nodes = {} nodes['as'] = sorted(list(set(as_cl_dict['as'].values()))) nodes['cl'] = list(set(as_cl_dict['cl'].values())) # add cell-free to list of cell lines nodes['cl'].append('cell-free') nodes['cl'] = sorted(nodes['cl']) # # run once - add back removed as and cl to Avi dictionary # # find assays and cell lines that were removed from original list # ##################################################################### # all_nodes = extract_nodes() # for inst_data in as_cl_dict: # # get all nodes # tmp_dict = set( as_cl_dict[inst_data].keys() ) # tmp_all = set( all_nodes[inst_data] ) # not_found = list( tmp_all - tmp_dict ) # print('\n') # print(inst_data) # for tmp in not_found: # print(tmp) # print('\n') # make 2d matrix for now mat = scipy.zeros([ len(nodes['as']), len(nodes['cl']) ]) # generate two released matrices rl = {} rl['t'] = scipy.zeros([ len(nodes['as']), len(nodes['cl']) ]) rl['f'] = scipy.zeros([ len(nodes['as']), len(nodes['cl']) ]) # generate perturbation dictionary that will save perturbation # information for assays and cell lines perts = {} total = 0 # loop through the ldf datasets for inst_ldr in ldr: # get the inst_assay: put name through dictionary # print( inst_ldr['datasetName'].strip() ) inst_as = as_cl_dict['as'][ inst_ldr['datasetName'].strip() ] # print('inst_as: '+ inst_as) # get the cell line(s) inst_cls = [] for inst_cl in inst_ldr['metadata']['cellLines']: if 'name' in inst_cl: #!! remove cell line 'TBD among cell ...' if 'TBD among' not in inst_cl['name'].strip(): inst_cls.append( as_cl_dict['cl'][ inst_cl['name'].strip() ] ) # get the perturbations inst_pts = [] for inst_pt in inst_ldr['metadata']['perturbagens']: inst_pts.append( inst_pt['name'].strip() ) # if the assay is kinomescan then set cell line to 'cell-free if inst_as == 'KINOMEscan': # print('kinomescan') inst_cls.append( 'cell-free' ) # print(inst_cls) # print('\n\n\n') # add information to mat # get index of assay index_as = nodes['as'].index(inst_as) # loop through cell lines for inst_cl in inst_cls: # get the index of the cell line index_cl = nodes['cl'].index(inst_cl) for inst_pt in inst_pts: # check if the perturbation represents multiple perturbations if 'compounds' in inst_pt and 'among' not in inst_pt: mult_pts = int(inst_pt.split(' ')[0]) else: mult_pts = 0 # track the number of perturbations and the released status ############################################################## if mult_pts == 0: mat[ index_as, index_cl ] = mat[ index_as, index_cl ] + 1 # track number of released if inst_ldr['released'] == True: rl['t'][index_as, index_cl] = rl['t'][index_as, index_cl] + 1 else: rl['f'][index_as, index_cl] = rl['f'][index_as, index_cl] + 1 else: mat[ index_as, index_cl ] = mat[ index_as, index_cl ] + mult_pts # track number of released if inst_ldr['released'] == True: rl['t'][index_as, index_cl] = rl['t'][index_as, index_cl] + mult_pts else: rl['f'][index_as, index_cl] = rl['f'][index_as, index_cl] + mult_pts # keep track of perturbation information in the dictionary ############################################################## # genrate as cl tuple inst_tuple = str((inst_as, inst_cl)) # initailize list if necessary if inst_tuple not in perts: perts[inst_tuple] = [] # generate pert_dict pert_dict = {} pert_dict['name'] = inst_pt pert_dict['release'] = inst_ldr['released'] pert_dict['_id'] = inst_ldr['_id'] # add dictionary to list perts[inst_tuple].append(pert_dict) # add to total total = total + 1 # check perts dictionary print('perts dictionary - the number of found as/cl combinations') print(len(perts.keys())) # print(perts) # print('\n\n'+str(total)) # save the matrix mat = mat.tolist() rl['t'] = rl['t'].tolist() rl['f'] = rl['f'].tolist() # save the list ldr_mat = {} ldr_mat['nodes'] = nodes ldr_mat['mat'] = mat ldr_mat['rl'] = rl ldr_mat['perts'] = perts json_scripts.save_to_json( ldr_mat, 'ldr_mat.json', 'no-indent' )
def assay_cl_dict(): import json_scripts f = open('LDR/assays_and_cl_lists_for_Avi-AM.txt', 'r') lines = f.readlines() f.close() # make names dictionary names = {} names['as'] = {} names['cl'] = {} # will go through assays and cell lines inst_data = '' # loop through the lines for inst_line in lines: # strip the line inst_line = inst_line.strip() if 'assays:' in inst_line: inst_data = 'as' # print(inst_data) if 'cell lines:' in inst_line: inst_data = 'cl' # print(inst_data) # load assays ############## if inst_data == 'as': # check if there is a short name if '\t' in inst_line: inst_sn = inst_line.split('\t')[0] inst_ln = inst_line.split('\t')[1] names[inst_data][inst_ln] = inst_sn # # add data to dictionary # if inst_ln not in names[inst_data]: # # add short name to dictionary # names[inst_data][inst_key].append(inst_ln) # if there is no short name add long name as key and value elif len(inst_line) > 0: names[inst_data][inst_line] = inst_line # names[inst_data][inst_line].append(inst_line) # load cell lines ################### if inst_data == 'cl': # check if there is a short name if '\t' in inst_line: inst_sn = inst_line.split('\t')[0] inst_ln = inst_line.split('\t')[1] names[inst_data][inst_ln] = inst_sn # # add data to dictionary # if inst_key not in names[inst_data]: # names[inst_data][inst_key] = [] # # add short name to dictionary # names[inst_data][inst_key].append(inst_ln) # if tehre is no short name add long name as key and value elif len(inst_line) > 0: names[inst_data][inst_line] = inst_line # names[inst_data][inst_line].append(inst_line) # print(len(names['as'].keys())) # print('\n') # print(len(names['cl'].keys())) # print('\n') # print( len(list(set(names['cl'].values()))) ) # print('\n') # print( len(list(set(names['as'].values()))) ) json_scripts.save_to_json(names,'as_cl_dict.json','indent')
def write_json_single_value(nodes, clust_order, mat, full_path, row_class={}, col_class={}, link_hl={} ): import json import json_scripts import d3_clustergram # initialize dict d3_json = d3_clustergram.ini_d3_json() # generate distance cutoffs all_dist = [] for i in range(11): all_dist.append(float(i)/10) #!! generate tmp classes import random random.seed(122341) # append row dicts to array for i in range(len(nodes['row'])): inst_dict = {} inst_dict['name'] = nodes['row'][i] inst_dict['clust'] = clust_order['clust']['row'].index(i) # do not need to get index inst_dict['rank'] = clust_order['rank']['row'][i] # save group inst_dict['group'] = [] for inst_dist in all_dist: inst_dict['group'].append( float(clust_order['group']['row'][inst_dist][i]) ) # save value for bar inst_dict['value'] = random.random() # add class information inst_dict['class'] = row_class[nodes['row'][i]] # append dictionary d3_json['row_nodes'].append(inst_dict) # append col dicts to array for i in range(len(nodes['col'])): inst_dict = {} inst_dict['name'] = nodes['col'][i] inst_dict['clust'] = clust_order['clust']['col'].index(i) # do not need to get index inst_dict['rank'] = clust_order['rank']['col'][i] # save group data for different cutoffs inst_dict['group'] = [] for inst_dist in all_dist: inst_dict['group'].append( float(clust_order['group']['col'][inst_dist][i]) ) # save value for bar inst_dict['value'] = random.random() # add class information inst_dict['class'] = col_class[nodes['col'][i]] # append dictionary d3_json['col_nodes'].append(inst_dict) # links - generate edge list for i in range(len(nodes['row'])): for j in range(len(nodes['col'])): if abs(mat[i,j]) > 0: inst_dict = {} inst_dict['source'] = i inst_dict['target'] = j inst_dict['value'] = mat[i,j] # initailize with no highlight inst_dict['highlight'] = 0 # add highlight if necessary if len(link_hl) > 0: # check highlight if nodes['col'][j] in link_hl: # check if gene is a known target of the transcription factor if nodes['row'][i] in link_hl[nodes['col'][j]]: # highlight inst_dict['highlight'] = 1 d3_json['links'].append( inst_dict ) # write json ############## # fw = open(full_path, 'w') # fw.write( json.dumps( d3_json, indent=2) ) # fw.close() json_scripts.save_to_json(d3_json, full_path, 'noindent')
def add_grant_num_to_clust(): import json_scripts import numpy as np import scipy print('\n-----------------\nadding grant numbers\n-----------------\n') # load json of Andrew data data_json = json_scripts.load_to_dict('andrew_data/cumul_probs.json') print( '\nthere are ' + str(len(data_json['nodes']['row'])) + ' genes in total' ) print( 'there are ' + str(len(data_json['nodes']['col'])) + ' resources in total\n' ) data_mat = np.asarray(data_json['data_mat']) print('data_mat shape') print(data_mat.shape) print('\ngoing to add grants per gene as a column into the harmonogram\n') # make an array of zeros that will be added to the matrix as a new column num_rows = len(data_json['nodes']['row']) extra_col = scipy.zeros([ num_rows, 1 ]) # #!! temporarily switching to ones from zeros # extra_col = scipy.ones([ num_rows, 1 ]) print('extra col shape') print(extra_col.shape) print(extra_col) # add the column using hstack data_mat = np.hstack((data_mat, extra_col)) print('data_mat shape after adding in extra column') print(data_mat.shape) # does not need to be done here ###################### # # add extra resource name # data_json['nodes']['col'].append('Grants_Per_Gene') print( 'there are ' + str(len(data_json['nodes']['col'])) + ' resources in total after adding grants per gene\n' ) # print(data_json['nodes']['col']) # add grants data to data_mat ############################### # load grants_per_gene data grants_gene = json_scripts.load_to_dict('andrew_data/grants_per_gene.json') # make list of genes that were not found genes_not_found = [] genes_found = [] # loop through genes and add grant information into data_mat for inst_gene in grants_gene: # get the index if the gene is in rows if inst_gene in data_json['nodes']['row']: # get the index of inst_gene inst_index = data_json['nodes']['row'].index(inst_gene) # print(inst_index) # keep track of found genes genes_found.append(inst_gene) # save CumulProbWeightSum to the matrix inst_grants = grants_gene[inst_gene]['CumulProbWeightSum'] # save the number of grants to the last column # data_mat[inst_index,-1] = inst_grants ## put in fake data data_mat[inst_index,82] = 1 #inst_grants else: # keep track of not found genes genes_not_found.append(inst_gene) # print(len(genes_found)) # print(len(genes_not_found)) # print(data_mat.shape) print('\n-------------\nchecking data_mat\n----------------\n') print(len(data_mat[:,-1])) print(data_mat[inst_index,82]) print(data_mat[2,82]) # convert data_mat to list data_mat = data_mat.tolist() # add back to json data_json['data_mat'] = data_mat # save to json json_scripts.save_to_json(data_json, 'andrew_data/cumul_probs.json', 'no_indent')
def load_andrew_data(): import json_scripts import scipy import numpy as np # load resource classes load_resource_classes() # load resource mapping names load_resource_real_names() # load Andrew's data matrix = json_scripts.load_to_dict('andrew_data/gene_dataset_cumulprobs_20150609.json') # add grants data to data_mat ############################### # load grants_per_gene data grants_gene = json_scripts.load_to_dict('andrew_data/grants_per_gene.json') # only keep the resources with real names rn = json_scripts.load_to_dict('resource_real_names.json') # Andrew data format ###################### # matrix is a list of dictionaries # each element of the list has a dictionary with two keys: label and entries # the first element of the list describes the columns of the matrix - label: n.a., entries: resources # the rest of the rows have gene names and the value of the gene in each resource # I will convert Andrew's data into # nodes and data_mat print('\nstarting to process data') # save row and column data to nodes nodes = {} # initialize a list of genes nodes['row'] = [] # get the good resources - get the real names nodes['col'] = rn.values() # save the column index of grants per gene col_index_grant = nodes['col'].index('Grants_Per_Gene') # print('\nlength of nodes col') # print(len(nodes['col'])) # print('\n') # get the number of rows in the matrix # make the matrix smaller by one row # num_rows = len(matrix) num_rows = len(matrix) - 1 # print('\nmatrix:') # print(matrix[0]['label']) # print(matrix[1]['label']) # print(matrix[2]['label']) # print('...') # print(matrix[-2]['label']) # print(matrix[-1]['label']) # print('\n') # print('there are '+str(num_rows)+' genes in the original data from Andrew') # initialize data matrix # rows - genes # cols - good resources data_mat = scipy.zeros([ num_rows, len(rn.keys()) ]) print('\n---------------\nadding original data to matrix\n----------------') # loop through the list # add one to account for the full length of the matrix for i in range(num_rows+1): # get the inst row of the matrix inst_row = matrix[i] # grab the gene name inst_name = inst_row['label'] # grab the list of entries - the actual numerical data inst_entries = inst_row['entries'] # gather the resource names if i == 0: # gather all resource (columns) all_res = inst_row['entries'] # skip the first line - it has column information if i > 0: # save to nodes['row'] nodes['row'].append(inst_name) # only add data from good resources ###################################### # save values to matrix for j in range(len(inst_entries)): # only add data from good resources if all_res[j] in rn: # get the inst inst_data_point = inst_entries[j] # get the resource index in the list of good resources - nodes['col'] # translate the long name (with underscores) to the real name inst_index = nodes['col'].index( rn[all_res[j]] ) # fill in the matrix with the entries from row i # shift the index back one to compensate for first row matrix_index = i-1 # shift the index to account for first row of colun labels data_mat[matrix_index,inst_index] = inst_data_point print('\n---------------\nadding grants to matrix\n----------------') # add grants per gene to matrix ################################## for inst_gene in grants_gene: # get the index of the gene if it is in the original rows if inst_gene in nodes['row']: # get the index of inst_gene inst_index = nodes['row'].index(inst_gene) # get the number of grants inst_grants = grants_gene[inst_gene]['CumulProbWeightSum'] # save the number of grants to the appropriate column data_mat[inst_index,col_index_grant] = inst_grants # print('i '+str(i)) # print('\n') # print('shape of data_mat after filling in ') # print(data_mat.shape) # print('\n') # print('length of nodes row') # print(len(nodes['row'])) # print('nodes') # print(nodes['row'][0]) # print(nodes['row'][-1]) # print('\n') # save json of the numpy-ready data # # convert numpy array to list data_mat = data_mat.tolist() # make one dictionary inst_dict = {} inst_dict['nodes'] = nodes inst_dict['data_mat'] = data_mat # save to json json_scripts.save_to_json(inst_dict,'andrew_data/cumul_probs.json','no_indent')
def generate_d3_json(): import json_scripts import d3_clustergram import scipy import numpy as np print('loading json in generate_d3_json') # load saved json of andrew data data_json = json_scripts.load_to_dict('andrew_data/cumul_probs.json') # get nodes and data_mat nodes = data_json['nodes'] data_mat = np.asarray(data_json['data_mat']) print(nodes['col']) print(data_mat.shape) print('calculating clustering orders') # gene and resource classes ################################# # gene class gc = json_scripts.load_to_dict('gene_classes_harmonogram.json') # resource class rc = json_scripts.load_to_dict('resource_classes_harminogram.json') # loop through classes for inst_class in gc: print(inst_class + '\n') # initialize class matrix # class_mat is the subset of data_mat that only has genes of one class, e.g. kinases class_mat = np.array([]) # initialize class_nodes for export class_nodes = {} class_nodes['col'] = nodes['col'] class_nodes['row'] = [] # loop through the rows and check if they are in the class for i in range(len(nodes['row'])): # get the index inst_gs = nodes['row'][i] # check if in class list if inst_gs in gc[inst_class]: # append gene symbol name to row class_nodes['row'].append(inst_gs) # initialize class_mat if necesary if len(class_mat) == 0: class_mat = data_mat[i,:] else: # fill in class_mat class_mat = np.vstack( (class_mat, data_mat[i,:] )) # actual clustering ######################## # cluster the matrix, return clust_order clust_order = d3_clustergram.cluster_row_and_column( class_nodes, class_mat, 'cosine' ) # # mock clustering # ############################ # print('mock clustering') # clust_order = {} # # mock cluster # clust_order['clust'] = {} # clust_order['clust']['row'] = range(len(class_nodes['row'])) # clust_order['clust']['col'] = range(len(class_nodes['col'])) # # mock rank # clust_order['rank'] = {} # clust_order['rank']['row'] = range(len(class_nodes['row'])) # clust_order['rank']['col'] = range(len(class_nodes['col'])) print('generating d3 json') # generate d3_clust json: return json d3_json = d3_clustergram.d3_clust_single_value(class_nodes, clust_order, class_mat ) # add extra information (data_group) to d3_json - add resource class to d3_json['col_nodes'] ############################################################################################### # loop through col_nodes for inst_col in d3_json['col_nodes']: # get the inst_res inst_res = inst_col['name'] # add the resource-class - data_group inst_col['data_group'] = rc[ inst_res ]['data_group'].replace(' ','_') # add extra link information about grant: this will be used to color the grant links externally # from the d3_clustergram code for inst_link in d3_json['links']: inst_link['info'] = 0 if d3_json['col_nodes'][inst_link['target']]['name'] == 'Grants_Per_Gene': inst_link['info'] = 1 print('saving to disk') # save visualization json json_scripts.save_to_json(d3_json,'static/networks/'+inst_class+'_cumul_probs.json','no_indent')
def write_json_single_value(nodes, clust_order, LDR, full_path, perts, row_class={}, col_class={}, link_hl={} ): import json import json_scripts import d3_clustergram print(perts.keys()) #!! special case, encode extra released information for LDR mat = LDR['mat'] # get release data rl = LDR['rl'] print('\n\nchecking rl\n\n') # print(rl['t']) # initialize dict d3_json = d3_clustergram.ini_d3_json() # generate distance cutoffs all_dist = [] for i in range(11): all_dist.append(float(i)/10) #!! generate tmp classes import random random.seed(122341) # append row dicts to array for i in range(len(nodes['row'])): inst_dict = {} inst_dict['name'] = nodes['row'][i] inst_dict['clust'] = clust_order['clust']['row'].index(i) # do not need to get index inst_dict['rank'] = clust_order['rank']['row'][i] # # save group # inst_dict['group'] = [] # for inst_dist in all_dist: # inst_dict['group'].append( float(clust_order['group']['row'][inst_dist][i]) ) # # save value for bar # inst_dict['value'] = random.random() # # add class information # inst_dict['class'] = row_class[nodes['row'][i]] # append dictionary d3_json['row_nodes'].append(inst_dict) # append col dicts to array for i in range(len(nodes['col'])): inst_dict = {} inst_dict['name'] = nodes['col'][i] inst_dict['clust'] = clust_order['clust']['col'].index(i) # do not need to get index inst_dict['rank'] = clust_order['rank']['col'][i] # # save group data for different cutoffs # inst_dict['group'] = [] # for inst_dist in all_dist: # inst_dict['group'].append( float(clust_order['group']['col'][inst_dist][i]) ) # # save value for bar # inst_dict['value'] = random.random() # # add class information # inst_dict['class'] = col_class[nodes['col'][i]] # append dictionary d3_json['col_nodes'].append(inst_dict) # links - generate edge list for i in range(len(nodes['row'])): for j in range(len(nodes['col'])): if abs(mat[i,j]) > 0: inst_dict = {} inst_dict['source'] = i inst_dict['target'] = j inst_dict['value'] = mat[i,j] # !! custom change for LDRgram inst_dict['value_up'] = rl['t'][i,j] inst_dict['value_dn'] = -rl['f'][i,j] # print('\tas: '+nodes['row'][i]) # print('\tcl: '+nodes['col'][j]) # add perturbation information inst_tuple = ( nodes['row'][i], nodes['col'][j] ) # print( perts[inst_tuple] ) # add to dictionary inst_dict['perts'] = perts[inst_tuple] d3_json['links'].append( inst_dict ) # write json ############## json_scripts.save_to_json(d3_json, full_path, 'indent')
def write_json_single_value(nodes, clust_order, LDR, full_path, perts, row_class={}, col_class={}, link_hl={}): import json import json_scripts import d3_clustergram print(perts.keys()) #!! special case, encode extra released information for LDR mat = LDR['mat'] # get release data rl = LDR['rl'] print('\n\nchecking rl\n\n') # print(rl['t']) # initialize dict d3_json = d3_clustergram.ini_d3_json() # generate distance cutoffs all_dist = [] for i in range(11): all_dist.append(float(i) / 10) #!! generate tmp classes import random random.seed(122341) # append row dicts to array for i in range(len(nodes['row'])): inst_dict = {} inst_dict['name'] = nodes['row'][i] inst_dict['clust'] = clust_order['clust']['row'].index(i) # do not need to get index inst_dict['rank'] = clust_order['rank']['row'][i] # # save group # inst_dict['group'] = [] # for inst_dist in all_dist: # inst_dict['group'].append( float(clust_order['group']['row'][inst_dist][i]) ) # # save value for bar # inst_dict['value'] = random.random() # # add class information # inst_dict['class'] = row_class[nodes['row'][i]] # append dictionary d3_json['row_nodes'].append(inst_dict) # append col dicts to array for i in range(len(nodes['col'])): inst_dict = {} inst_dict['name'] = nodes['col'][i] inst_dict['clust'] = clust_order['clust']['col'].index(i) # do not need to get index inst_dict['rank'] = clust_order['rank']['col'][i] # # save group data for different cutoffs # inst_dict['group'] = [] # for inst_dist in all_dist: # inst_dict['group'].append( float(clust_order['group']['col'][inst_dist][i]) ) # # save value for bar # inst_dict['value'] = random.random() # # add class information # inst_dict['class'] = col_class[nodes['col'][i]] # append dictionary d3_json['col_nodes'].append(inst_dict) # links - generate edge list for i in range(len(nodes['row'])): for j in range(len(nodes['col'])): if abs(mat[i, j]) > 0: inst_dict = {} inst_dict['source'] = i inst_dict['target'] = j inst_dict['value'] = mat[i, j] # !! custom change for LDRgram inst_dict['value_up'] = rl['t'][i, j] inst_dict['value_dn'] = -rl['f'][i, j] # print('\tas: '+nodes['row'][i]) # print('\tcl: '+nodes['col'][j]) # add perturbation information inst_tuple = (nodes['row'][i], nodes['col'][j]) # print( perts[inst_tuple] ) # add to dictionary inst_dict['perts'] = perts[inst_tuple] d3_json['links'].append(inst_dict) # write json ############## json_scripts.save_to_json(d3_json, full_path, 'indent')