def add_mutations(cl_info): print('add mutations\n') from clustergrammer import Network net = Network() old_cl_info = net.load_json_to_dict('cell_line_muts.json') cl_muts = old_cl_info['muts'] for inst_cl in cl_info: # remove plex name if necessary if '_plex_' in inst_cl: simple_cl = inst_cl.split('_')[0] else: simple_cl = inst_cl for inst_mut in cl_muts: mutated_cls = cl_muts[inst_mut] if simple_cl in mutated_cls: has_mut = 'true' else: has_mut = 'false' mutation_title = 'mut-'+inst_mut # use the original long cell line name (with possible plex) cl_info[inst_cl][mutation_title] = has_mut return cl_info
def add_mutations(cl_info): print('add mutations\n') from clustergrammer import Network net = Network() old_cl_info = net.load_json_to_dict('cell_line_muts.json') cl_muts = old_cl_info['muts'] for inst_cl in cl_info: # remove plex name if necessary if '_plex_' in inst_cl: simple_cl = inst_cl.split('_')[0] else: simple_cl = inst_cl for inst_mut in cl_muts: mutated_cls = cl_muts[inst_mut] if simple_cl in mutated_cls: has_mut = 'true' else: has_mut = 'false' mutation_title = 'mut-' + inst_mut # use the original long cell line name (with possible plex) cl_info[inst_cl][mutation_title] = has_mut return cl_info
def make_enr_vect_clust(): import enrichr_functions as enr_fun from clustergrammer import Network net = Network() g2e_post = net.load_json_to_dict('json/g2e_enr_vect.json') net = enr_fun.make_enr_vect_clust(g2e_post, 0.001, 1) net.write_json_to_file('viz','json/enr_vect_example.json')
def cluster(): from clustergrammer import Network net = Network() vect_post = net.load_json_to_dict('fake_vect_post.json') net.load_vect_post_to_net(vect_post) net.swap_nan_for_zero() # net.N_top_views() net.make_clust(dist_type='cos',views=['N_row_sum','N_row_var'], dendro=True) net.write_json_to_file('viz','json/large_vect_post_example.json','indent')
def make_plex_matrix(): ''' Make a cell line matrix with plex rows and cell line columns. This will be used as a negative control that should show worsening correlation as data is normalized/filtered. ''' import numpy as np import pandas as pd from clustergrammer import Network # load cl_info net = Network() cl_info = net.load_json_to_dict( '../cell_line_info/cell_line_info_dict.json') # load cell line expression net.load_file('../CCLE_gene_expression/CCLE_NSCLC_all_genes.txt') tmp_df = net.dat_to_df() df = tmp_df['mat'] cols = df.columns.tolist() rows = range(9) rows = [i + 1 for i in rows] print(rows) mat = np.zeros((len(rows), len(cols))) for inst_col in cols: for inst_cl in cl_info: if inst_col in inst_cl: inst_plex = int(cl_info[inst_cl]['Plex']) if inst_plex != -1: # print(inst_col + ' in ' + inst_cl + ': ' + str(inst_plex)) row_index = rows.index(inst_plex) col_index = cols.index(inst_col) mat[row_index, col_index] = 1 df_plex = pd.DataFrame(data=mat, columns=cols, index=rows) filename = '../lung_cellline_3_1_16/lung_cl_all_ptm/precalc_processed/' + \ 'exp-plex.txt' df_plex.to_csv(filename, sep='\t')
def make_plex_matrix(): ''' Make a cell line matrix with plex rows and cell line columns. This will be used as a negative control that should show worsening correlation as data is normalized/filtered. ''' import numpy as np import pandas as pd from clustergrammer import Network # load cl_info net = Network() cl_info = net.load_json_to_dict('../cell_line_info/cell_line_info_dict.json') # load cell line expression net.load_file('../CCLE_gene_expression/CCLE_NSCLC_all_genes.txt') tmp_df = net.dat_to_df() df = tmp_df['mat'] cols = df.columns.tolist() rows = range(9) rows = [i+1 for i in rows] print(rows) mat = np.zeros((len(rows), len(cols))) for inst_col in cols: for inst_cl in cl_info: if inst_col in inst_cl: inst_plex = int(cl_info[inst_cl]['Plex']) if inst_plex != -1: # print(inst_col + ' in ' + inst_cl + ': ' + str(inst_plex)) row_index = rows.index(inst_plex) col_index = cols.index(inst_col) mat[row_index, col_index] = 1 df_plex = pd.DataFrame(data=mat, columns=cols, index=rows) filename = '../lung_cellline_3_1_16/lung_cl_all_ptm/precalc_processed/' + \ 'exp-plex.txt' df_plex.to_csv(filename, sep='\t')
def post_to_clustergrammer(): from clustergrammer import Network import requests import json upload_url = 'http://localhost:9000/clustergrammer/vector_upload/' # upload_url = 'http://amp.pharm.mssm.edu/clustergrammer/vector_upload/' net = Network() vect_post = net.load_json_to_dict('test_vector_upload.json') # vect_post = net.load_json_to_dict('fake_vect_post.json') r = requests.post(upload_url, data=json.dumps(vect_post)) link = r.text print(link)
def cluster(): from clustergrammer import Network net = Network() vect_post = net.load_json_to_dict('fake_vect_post.json') net.load_vect_post_to_net(vect_post) net.swap_nan_for_zero() # net.N_top_views() net.make_clust(dist_type='cos', views=['N_row_sum', 'N_row_var'], dendro=True) net.write_json_to_file('viz', 'json/large_vect_post_example.json', 'indent')
def post_to_clustergrammer(): from clustergrammer import Network import requests import json upload_url = 'http://localhost:9000/clustergrammer/vector_upload/' # upload_url = 'http://amp.pharm.mssm.edu/clustergrammer/vector_upload/' net = Network() vect_post = net.load_json_to_dict('test_vector_upload.json') # vect_post = net.load_json_to_dict('fake_vect_post.json') r = requests.post(upload_url, data=json.dumps(vect_post) ) link = r.text print(link)
def proc_locally(): from clustergrammer import Network # import run_g2e_background net = Network() vect_post = net.load_json_to_dict('large_vect_post.json') print(vect_post.keys()) # mongo_address = '10.125.161.139' net.load_vect_post_to_net(vect_post) net.swap_nan_for_zero() net.N_top_views() print(net.viz.keys())
def proc_locally(): from clustergrammer import Network # import run_g2e_background net = Network() vect_post = net.load_json_to_dict('large_vect_post.json') print(vect_post.keys()) # mongo_address = '10.125.161.139' net.load_vect_post_to_net(vect_post) net.swap_nan_for_zero() net.N_top_views() print(net.viz.keys())
def main(): net = Network() # load genes of interest gene_info = net.load_json_to_dict('../grant_pois/gene_info_with_dark.json') # ENCODE, GTEx, etc # hzome_names = ['my_CCLE_exp.txt', 'ENCODE_TF_targets.txt', 'ChEA_TF_targets.txt'] hzome_names = ['ENCODE_TF_targets.txt'] # define separate sim_cutoffs for different files cutoffs = {} cutoffs['my_CCLE_exp.txt'] = 0.15 cutoffs['ENCODE_TF_targets.txt'] = 0.35 ## 0.6 cutoffs['ChEA_TF_targets.txt'] = 0.2 cutoffs['my_gtex_Moshe_2017_exp.txt'] = 0.2 genes_of_class = gene_info['KIN']['all'] for hzome_name in hzome_names: hzome_filename = '../hzome_data/' + hzome_name print('loading data ') # load hzome data #################### if 'my_' in hzome_name: # if I am providing the data, then load in normal way net.load_file(hzome_filename) hzome_data = net.export_df() else: # load data in hzome format hzome_data = deepcopy(hzome_to_df.load_matrix(hzome_filename)) print('data loaded\n') for gene_class in gene_info: calc_gene_sim_mat(hzome_data, net, gene_info, gene_class, hzome_name, cutoffs)
def main(): ''' This will add cell line category information (including plexes and gene-expression groups to the gene expression data from CCLE) ''' from clustergrammer import Network net = Network() # load original CCLE gene expression data for CST lung cancer cell lines filename = 'CCLE_gene_expression/CCLE_NSCLC_all_genes.txt' f = open(filename, 'r') lines = f.readlines() f.close() # load cell line info cl_info = net.load_json_to_dict('cell_line_info/cell_line_muts.json') # write to new file new_file = 'CCLE_gene_expression/CCLE_NSCLC_cats_all_genes.txt' fw = open(new_file, 'w') fw.close()
def main(): ''' This will add cell line category information (including plexes and gene-expression groups to the gene expression data from CCLE) ''' from clustergrammer import Network net = Network() # load original CCLE gene expression data for CST lung cancer cell lines filename = 'CCLE_gene_expression/CCLE_NSCLC_all_genes.txt' f = open(filename, 'r') lines = f.readlines() f.close() # load cell line info cl_info = net.load_json_to_dict('cell_line_info/cell_line_muts.json') # write to new file new_file = 'CCLE_gene_expression/CCLE_NSCLC_cats_all_genes.txt' fw = open(new_file, 'w') fw.close()