def make_viz_from_df(df, filename): from clustergrammer import Network net = Network() net.df_to_dat(df) net.swap_nan_for_zero() # zscore first to get the columns distributions to be similar net.normalize(axis='col', norm_type='zscore', keep_orig=True) # filter the rows to keep the perts with the largest normalizes values net.filter_N_top('row', 2000) num_coluns = net.dat['mat'].shape[1] if num_coluns < 50: # views = ['N_row_sum', 'N_row_var'] views = ['N_row_sum'] net.make_clust(dist_type='cos', views=views) filename = 'json/' + filename.split('/')[1].replace('.gct', '') + '.json' net.write_json_to_file('viz', filename)
def make_phos_homepage_viz(): from clustergrammer import Network net = Network() filename = 'lung_cellline_3_1_16/lung_cellline_phospho/' + \ 'lung_cellline_TMT_phospho_combined_ratios.tsv' net.load_file(filename) # quantile normalize to normalize cell lines net.normalize(axis='col', norm_type='qn') # only keep most differentially regulated PTMs net.filter_N_top('row', 250, 'sum') # take zscore of rows net.normalize(axis='row', norm_type='zscore', keep_orig=True) net.swap_nan_for_zero() # threshold filter PTMs net.filter_threshold('row', threshold=1.75, num_occur=3) views = ['N_row_sum', 'N_row_var'] net.make_clust(dist_type='cos', views=views, dendro=True, sim_mat=True, calc_cat_pval=True) net.write_json_to_file('viz', 'json/homepage_phos.json', 'indent')
def clust_vect(db, viz_doc, vect_post): from clustergrammer import Network try: net = Network() net.load_vect_post_to_net(vect_post) net.swap_nan_for_zero() views = ['N_row_sum', 'N_row_var'] net.make_clust(dist_type='cosine', dendro=True, views=views, linkage_type='average') dat_id = upload_dat(db, net) update_viz = net.viz update_dat = dat_id except: print('error clustering') update_viz = 'error' update_dat = 'error' viz_doc['viz'] = update_viz viz_doc['dat'] = update_dat return viz_doc
def make_viz_json(inst_df, name): from clustergrammer import Network net = Network() filename = 'json/'+name load_df = {} load_df['mat'] = inst_df net.df_to_dat(load_df) net.swap_nan_for_zero() net.make_clust(views=[]) net.write_json_to_file('viz', filename, 'no-indent')
def main( buff, inst_filename, mongo_address, viz_id): import numpy as np import flask from bson.objectid import ObjectId from pymongo import MongoClient from flask import request from clustergrammer import Network import StringIO client = MongoClient(mongo_address) db = client.clustergrammer viz_id = ObjectId(viz_id) found_viz = db.networks.find_one({'_id':viz_id}) try: net = Network() net.load_tsv_to_net(buff) net.swap_nan_for_zero() views = ['N_row_sum', 'N_row_var'] net.make_clust(dist_type='cosine', dendro=True, views=views, \ linkage_type='average') export_dat = {} export_dat['name'] = inst_filename export_dat['dat'] = net.export_net_json('dat') export_dat['source'] = 'user_upload' dat_id = db.network_data.insert(export_dat) update_viz = net.viz update_dat = dat_id except: print('\n-----------------------') print('error in clustering') print('-----------------------\n') update_viz = 'error' update_dat = 'error' found_viz['viz'] = update_viz found_viz['dat'] = update_dat db.networks.update_one( {'_id':viz_id}, {'$set': found_viz} ) client.close()
def main(buff, inst_filename, mongo_address, viz_id): import numpy as np import flask from bson.objectid import ObjectId from pymongo import MongoClient from flask import request from clustergrammer import Network import StringIO client = MongoClient(mongo_address) db = client.clustergrammer viz_id = ObjectId(viz_id) found_viz = db.networks.find_one({'_id': viz_id}) try: net = Network() net.load_tsv_to_net(buff) net.swap_nan_for_zero() views = ['N_row_sum', 'N_row_var'] net.make_clust(dist_type='cosine', dendro=True, views=views, \ linkage_type='average') export_dat = {} export_dat['name'] = inst_filename export_dat['dat'] = net.export_net_json('dat') export_dat['source'] = 'user_upload' dat_id = db.network_data.insert(export_dat) update_viz = net.viz update_dat = dat_id except: print('\n-----------------------') print('error in clustering') print('-----------------------\n') update_viz = 'error' update_dat = 'error' found_viz['viz'] = update_viz found_viz['dat'] = update_dat db.networks.update_one({'_id': viz_id}, {'$set': found_viz}) client.close()
def cluster(): from clustergrammer import Network net = Network() vect_post = net.load_json_to_dict('fake_vect_post.json') net.load_vect_post_to_net(vect_post) net.swap_nan_for_zero() # net.N_top_views() net.make_clust(dist_type='cos',views=['N_row_sum','N_row_var'], dendro=True) net.write_json_to_file('viz','json/large_vect_post_example.json','indent')
def clustergrammer_load(): # import network class from Network.py from clustergrammer import Network net = Network() net.pandas_load_file('mat_cats.tsv') net.make_clust(dist_type='cos', views=['N_row_sum', 'N_row_var']) net.write_json_to_file('viz', 'json/mult_cats.json', 'indent') print('\n**********************') print(net.dat['node_info']['row'].keys()) print('\n\n')
def clustergrammer_load(): # import network class from Network.py from clustergrammer import Network net = Network() net.pandas_load_file('mat_cats.tsv') net.make_clust(dist_type='cos',views=['N_row_sum','N_row_var']) net.write_json_to_file('viz','json/mult_cats.json','indent') print('\n**********************') print(net.dat['node_info']['row'].keys()) print('\n\n')
def build_layout(): df = load_data.load_gsva_compare_cluster('hallmark') # TODO THIS NEEDS TO BE CLEANED UP!!!!!! cat_to_true = defaultdict(lambda: []) for clust in df.index: if 'PJ030' in clust: cat_to_true['LGG'].append(clust) elif 'PJ' in clust: cat_to_true['GBM'].append(clust) elif 'LX' in clust: cat_to_true['LUAD'].append(clust) elif 'GSE146026' in clust: cat_to_true['OV'].append(clust) elif 'GSE72056' in clust: cat_to_true['SKCM'].append(clust) elif 'GSE103322' in clust: cat_to_true['HNSC'].append(clust) elif 'GSE111672' in clust: cat_to_true['PAAD'].append(clust) cats = [{ 'title': 'Cancer Type', 'cats': {k: v for k, v in cat_to_true.items()} }] net = Network() net.load_df(df) net.add_cats('row', cats) net.make_clust() layout = dcc.Tab(label='Cluster Comparison', children=[ dbc.Container( fluid=True, children=[ html.Link(rel='stylesheet', href='./static/custom.css'), dash_clustergrammer.DashClustergrammer( id='cgram-component', label='', network_data=net.viz) ]) ]) return layout
def cluster(): from clustergrammer import Network net = Network() vect_post = net.load_json_to_dict('fake_vect_post.json') net.load_vect_post_to_net(vect_post) net.swap_nan_for_zero() # net.N_top_views() net.make_clust(dist_type='cos', views=['N_row_sum', 'N_row_var'], dendro=True) net.write_json_to_file('viz', 'json/large_vect_post_example.json', 'indent')
def make_exp_homepage_viz(): from clustergrammer import Network net = Network() net.load_file('CCLE_gene_expression/CCLE_NSCLC_all_genes.txt') # threshold filter expression net.filter_threshold('row', threshold=3.0, num_occur=4) views = ['N_row_sum', 'N_row_var'] net.make_clust(dist_type='cos', views=views, dendro=True, sim_mat=True, calc_cat_pval=False) net.write_json_to_file('viz', 'json/homepage_exp.json', 'indent')
def main(): import time start_time = time.time() import pandas as pd import StringIO # import network class from Network.py from clustergrammer import Network net = Network() # load data to dataframe # net.load_tsv_to_net('txt/example_tsv_network.txt') # net.load_tsv_to_net('txt/mat_1mb.txt') # choose file ################ # file_buffer = open('txt/col_categories.txt') file_buffer = open('txt/example_tsv_network.txt' ) buff = StringIO.StringIO( file_buffer.read() ) net.pandas_load_tsv_to_net(buff) # filter rows views = ['filter_row_sum','N_row_sum'] # distance metric dist_type = 'cosine' # linkage type linkage_type = 'average' net.make_clust(dist_type=dist_type, views=views, calc_col_cats=True,\ linkage_type=linkage_type) net.write_json_to_file('viz', 'json/mult_view.json', 'no-indent') elapsed_time = time.time() - start_time print('\n\n\nelapsed time: '+str(elapsed_time))
def main(): import time start_time = time.time() import pandas as pd import StringIO # import network class from Network.py from clustergrammer import Network net = Network() # load data to dataframe # net.load_tsv_to_net('txt/example_tsv_network.txt') # net.load_tsv_to_net('txt/mat_1mb.txt') # choose file ################ # file_buffer = open('txt/col_categories.txt') file_buffer = open('txt/example_tsv_network.txt') buff = StringIO.StringIO(file_buffer.read()) net.pandas_load_tsv_to_net(buff) # filter rows views = ['filter_row_sum', 'N_row_sum'] # distance metric dist_type = 'cosine' # linkage type linkage_type = 'average' net.make_clust(dist_type=dist_type, views=views, calc_col_cats=True,\ linkage_type=linkage_type) net.write_json_to_file('viz', 'json/mult_view.json', 'no-indent') elapsed_time = time.time() - start_time print('\n\n\nelapsed time: ' + str(elapsed_time))
def make_json_from_tsv(name): ''' make a clustergrammer json from a tsv file ''' from clustergrammer import Network print('\n' + name) net = Network() filename = 'txt/'+ name + '.txt' net.load_file(filename) df = net.dat_to_df() net.swap_nan_for_zero() # zscore first to get the columns distributions to be similar net.normalize(axis='col', norm_type='zscore', keep_orig=True) # filter the rows to keep the perts with the largest normalizes values net.filter_N_top('row', 1000) num_rows = net.dat['mat'].shape[0] num_cols = net.dat['mat'].shape[1] print('num_rows ' + str(num_rows)) print('num_cols ' + str(num_cols)) if num_cols < 50 or num_rows < 1000: views = ['N_row_sum'] net.make_clust(dist_type='cos', views=views) export_filename = 'json/' + name + '.json' net.write_json_to_file('viz', export_filename) else: print('did not cluster, too many columns ')
def make_viz_from_df(df, filename): from clustergrammer import Network net = Network() net.df_to_dat(df) net.swap_nan_for_zero() # zscore first to get the columns distributions to be similar net.normalize(axis='col', norm_type='zscore', keep_orig=True) # filter the rows to keep the perts with the largest normalizes values net.filter_N_top('row', 2000) num_coluns = net.dat['mat'].shape[1] if num_coluns < 50: # views = ['N_row_sum', 'N_row_var'] views = ['N_row_sum'] net.make_clust(dist_type='cos', views=views) filename = 'json/' + filename.split('/')[1].replace('.gct','') + '.json' net.write_json_to_file('viz', filename)
from clustergrammer import Network net = Network() # load matrix tsv file net.load_stdin() # optional filtering and normalization ########################################## # net.filter_sum('row', threshold=20) # net.normalize(axis='col', norm_type='zscore', keep_orig=True) # net.filter_N_top('row', 250, rank_type='sum') # net.filter_threshold('row', threshold=3.0, num_occur=4) # net.swap_nan_for_zero() net.make_clust(dist_type='cos', views=['N_row_sum', 'N_row_var'], dendro=True, sim_mat=False, filter_sim=0.1, calc_cat_pval=False) # output jsons for front-end visualizations print(net.export_net_json('viz', 'no-indent'))
import time start_time = time.time() from clustergrammer import Network net = Network() net.load_file('txt/rc_two_cats.txt') # net.load_file('txt/tmp.txt') views = ['N_row_sum', 'N_row_var'] net.make_clust(dist_type='cos', views=views, dendro=True, sim_mat=True) net.write_json_to_file('viz', 'json/mult_view.json') net.write_json_to_file('sim_row', 'json/mult_view_sim_row.json') net.write_json_to_file('sim_col', 'json/mult_view_sim_col.json') elapsed_time = time.time() - start_time print('\n\nelapsed time') print(elapsed_time)
# Remove names for clustergrammer gene_attribute_matrix.index.name = "" gene_attribute_matrix.columns.name = "" # Write to file # fp = StringIO() # gene_attribute_matrix.to_csv(fp, sep='\t') gene_attribute_matrix.to_csv('tmp.txt', sep='\t') # Custergrammer from clustergrammer import Network net = Network() # net.load_tsv_to_net(fp, name) # StringIO net.load_file('tmp.txt') net.swap_nan_for_zero() # Generate net.make_clust(dist_type='cos',views=['N_row_sum', 'N_row_var'], dendro=True, sim_mat=True, filter_sim=0.1, calc_cat_pval=False) # Insert into database cur.execute('insert into `datasets` (`Name`, `prot_att`, `att_att`, `prot_prot`) values (?, ?, ?, ?)', (name, net.export_net_json('viz', indent='no-indent'), net.export_net_json('sim_col', indent='no-indent'), net.export_net_json('sim_row', indent='no-indent'))) con.commit() except Exception as e: print "Couldn't process %s (%s)" % (name, e) continue print "Processed %s" % (name) con.close()
def clust_from_response(response_list): from clustergrammer import Network import scipy import json import pandas as pd import math from copy import deepcopy # print('----------------------') # print('enrichr_clust_from_response') # print('----------------------') ini_enr = transfer_to_enr_dict(response_list) enr = [] scores = {} score_types = ['combined_score', 'pval', 'zscore'] for score_type in score_types: scores[score_type] = pd.Series() for inst_enr in ini_enr: if inst_enr['combined_score'] > 0: # make series of enriched terms with scores for score_type in score_types: # collect the scores of the enriched terms if score_type == 'combined_score': scores[score_type][inst_enr['name']] = inst_enr[score_type] if score_type == 'pval': scores[score_type][inst_enr['name']] = -math.log( inst_enr[score_type]) if score_type == 'zscore': scores[score_type][ inst_enr['name']] = -inst_enr[score_type] # keep enrichement values enr.append(inst_enr) # sort and normalize the scores for score_type in score_types: scores[score_type] = scores[score_type] / scores[score_type].max() scores[score_type].sort_values(ascending=False) number_of_enriched_terms = len(scores['combined_score']) enr_score_types = ['combined_score', 'pval', 'zscore'] if number_of_enriched_terms < 10: num_dict = {'ten': 10} elif number_of_enriched_terms < 20: num_dict = {'ten': 10, 'twenty': 20} else: num_dict = {'ten': 10, 'twenty': 20, 'thirty': 30} # gather lists of top scores top_terms = {} for enr_type in enr_score_types: top_terms[enr_type] = {} for num_terms in list(num_dict.keys()): inst_num = num_dict[num_terms] top_terms[enr_type][num_terms] = scores[enr_type].index.tolist( )[:inst_num] # gather the terms that should be kept - they are at the top of the score list keep_terms = [] for inst_enr_score in top_terms: for tmp_num in list(num_dict.keys()): keep_terms.extend(top_terms[inst_enr_score][tmp_num]) keep_terms = list(set(keep_terms)) # keep enriched terms that are at the top 10 based on at least one score keep_enr = [] for inst_enr in enr: if inst_enr['name'] in keep_terms: keep_enr.append(inst_enr) # fill in full matrix ####################### # genes row_node_names = [] # enriched terms col_node_names = [] # gather information from the list of enriched terms for inst_enr in keep_enr: col_node_names.append(inst_enr['name']) row_node_names.extend(inst_enr['int_genes']) row_node_names = sorted(list(set(row_node_names))) net = Network() net.dat['nodes']['row'] = row_node_names net.dat['nodes']['col'] = col_node_names net.dat['mat'] = scipy.zeros([len(row_node_names), len(col_node_names)]) for inst_enr in keep_enr: inst_term = inst_enr['name'] col_index = col_node_names.index(inst_term) # use combined score for full matrix - will not be seen in viz tmp_score = scores['combined_score'][inst_term] net.dat['node_info']['col']['value'].append(tmp_score) for inst_gene in inst_enr['int_genes']: row_index = row_node_names.index(inst_gene) # save association net.dat['mat'][row_index, col_index] = 1 # cluster full matrix ############################# # do not make multiple views views = [''] if len(net.dat['nodes']['row']) > 1: net.make_clust(dist_type='jaccard', views=views, dendro=False) else: net.make_clust(dist_type='jaccard', views=views, dendro=False, run_clustering=False) # get dataframe from full matrix df = net.dat_to_df() for score_type in score_types: for num_terms in num_dict: inst_df = deepcopy(df) inst_net = deepcopy(Network()) inst_df['mat'] = inst_df['mat'][top_terms[score_type][num_terms]] # load back into net inst_net.df_to_dat(inst_df) # make views if len(net.dat['nodes']['row']) > 1: inst_net.make_clust(dist_type='jaccard', views=['N_row_sum'], dendro=False) else: inst_net.make_clust(dist_type='jaccard', views=['N_row_sum'], dendro=False, run_clustering=False) inst_views = inst_net.viz['views'] # add score_type to views for inst_view in inst_views: inst_view['N_col_sum'] = num_dict[num_terms] inst_view['enr_score_type'] = score_type # add values to col_nodes and order according to rank for inst_col in inst_view['nodes']['col_nodes']: inst_col['rank'] = len( top_terms[score_type] [num_terms]) - top_terms[score_type][num_terms].index( inst_col['name']) inst_name = inst_col['name'] inst_col['value'] = scores[score_type][inst_name] # add views to main network net.viz['views'].extend(inst_views) return net
net.load_file('txt/rc_two_cats.txt') # net.load_file('txt/example_tsv.txt') # net.load_file('txt/col_categories.txt') # net.load_file('txt/mat_cats.tsv') # net.load_file('txt/mat_1mb.Txt') # net.load_file('txt/mnist.txt') # net.load_file('txt/sim_mat_4_cats.txt') views = ['N_row_sum','N_row_var'] # # filtering rows and cols by sum # net.filter_sum('row', threshold=20) # net.filter_sum('col', threshold=30) # # keep top rows based on sum # net.filter_N_top('row', 10, 'sum') net.make_clust(dist_type='cos',views=views , dendro=True, sim_mat=True, filter_sim=0.1) # net.produce_view({'N_row_sum':10,'dist':'euclidean'}) net.write_json_to_file('viz', 'json/mult_view.json', 'no-indent') net.write_json_to_file('sim_row', 'json/mult_view_sim_row.json', 'no-indent') net.write_json_to_file('sim_col', 'json/mult_view_sim_col.json', 'no-indent') elapsed_time = time.time() - start_time print('\n\nelapsed time') print(elapsed_time)
def clust_from_response(response_list): from clustergrammer import Network import scipy import json import pandas as pd import math from copy import deepcopy print('----------------------') print('enrichr_clust_from_response') print('----------------------') ini_enr = transfer_to_enr_dict( response_list ) enr = [] scores = {} score_types = ['combined_score','pval','zscore'] for score_type in score_types: scores[score_type] = pd.Series() for inst_enr in ini_enr: if inst_enr['combined_score'] > 0: # make series of enriched terms with scores for score_type in score_types: # collect the scores of the enriched terms if score_type == 'combined_score': scores[score_type][inst_enr['name']] = inst_enr[score_type] if score_type == 'pval': scores[score_type][inst_enr['name']] = -math.log(inst_enr[score_type]) if score_type == 'zscore': scores[score_type][inst_enr['name']] = -inst_enr[score_type] # keep enrichement values enr.append(inst_enr) # sort and normalize the scores for score_type in score_types: scores[score_type] = scores[score_type]/scores[score_type].max() scores[score_type].sort(ascending=False) number_of_enriched_terms = len(scores['combined_score']) enr_score_types = ['combined_score','pval','zscore'] if number_of_enriched_terms <10: num_dict = {'ten':10} elif number_of_enriched_terms <20: num_dict = {'ten':10, 'twenty':20} else: num_dict = {'ten':10, 'twenty':20, 'thirty':30} # gather lists of top scores top_terms = {} for enr_type in enr_score_types: top_terms[enr_type] = {} for num_terms in num_dict.keys(): inst_num = num_dict[num_terms] top_terms[enr_type][num_terms] = scores[enr_type].index.tolist()[: inst_num] # gather the terms that should be kept - they are at the top of the score list keep_terms = [] for inst_enr_score in top_terms: for tmp_num in num_dict.keys(): keep_terms.extend( top_terms[inst_enr_score][tmp_num] ) keep_terms = list(set(keep_terms)) # keep enriched terms that are at the top 10 based on at least one score keep_enr = [] for inst_enr in enr: if inst_enr['name'] in keep_terms: keep_enr.append(inst_enr) # fill in full matrix ####################### # genes row_node_names = [] # enriched terms col_node_names = [] # gather information from the list of enriched terms for inst_enr in keep_enr: col_node_names.append(inst_enr['name']) row_node_names.extend(inst_enr['int_genes']) row_node_names = sorted(list(set(row_node_names))) net = Network() net.dat['nodes']['row'] = row_node_names net.dat['nodes']['col'] = col_node_names net.dat['mat'] = scipy.zeros([len(row_node_names),len(col_node_names)]) for inst_enr in keep_enr: inst_term = inst_enr['name'] col_index = col_node_names.index(inst_term) # use combined score for full matrix - will not be seen in viz tmp_score = scores['combined_score'][inst_term] net.dat['node_info']['col']['value'].append(tmp_score) for inst_gene in inst_enr['int_genes']: row_index = row_node_names.index(inst_gene) # save association net.dat['mat'][row_index, col_index] = 1 # cluster full matrix ############################# # do not make multiple views views = [''] if len(net.dat['nodes']['row']) > 1: net.make_clust(dist_type='jaccard', views=views, dendro=False) else: net.make_clust(dist_type='jaccard', views=views, dendro=False, run_clustering=False) # get dataframe from full matrix df = net.dat_to_df() for score_type in score_types: for num_terms in num_dict: inst_df = deepcopy(df) inst_net = deepcopy(Network()) inst_df['mat'] = inst_df['mat'][top_terms[score_type][num_terms]] # load back into net inst_net.df_to_dat(inst_df) # make views if len(net.dat['nodes']['row']) > 1: inst_net.make_clust(dist_type='jaccard', views=['N_row_sum'], dendro=False) else: inst_net.make_clust(dist_type='jaccard', views=['N_row_sum'], dendro=False, run_clustering = False) inst_views = inst_net.viz['views'] # add score_type to views for inst_view in inst_views: inst_view['N_col_sum'] = num_dict[num_terms] inst_view['enr_score_type'] = score_type # add values to col_nodes and order according to rank for inst_col in inst_view['nodes']['col_nodes']: inst_col['rank'] = len(top_terms[score_type][num_terms]) - top_terms[score_type][num_terms].index(inst_col['name']) inst_name = inst_col['name'] inst_col['value'] = scores[score_type][inst_name] # add views to main network net.viz['views'].extend(inst_views) return net
# net.normalize(axis='row', norm_type='qn') # net.normalize(axis='col', norm_type='zscore', keep_orig=True) # net.filter_N_top('row', 100, rank_type='var') # net.filter_N_top('col', 3, rank_type='var') # net.filter_threShold('col', threshold=2, num_occur=3 # net.filter_threshold('row', threshold=3.0, num_occur=4) net.swap_nan_for_zero() # df = net.dat_to_df() views = ['N_row_sum', 'N_row_var'] net.make_clust(dist_type='cos',views=views , dendro=True, sim_mat=True, filter_sim=0.1, calc_cat_pval=False) # run_enrichr=['ChEA_2015']) # run_enrichr=['ENCODE_TF_ChIP-seq_2014']) # run_enrichr=['KEA_2015']) # run_enrichr=['GO_Biological_Process_2015']) net.write_json_to_file('viz', 'json/mult_view.json', 'no-indent') net.write_json_to_file('sim_row', 'json/mult_view_sim_row.json', 'no-indent') net.write_json_to_file('sim_col', 'json/mult_view_sim_col.json', 'no-indent') # net.write_matrix_to_tsv ('txt/export_tmp.txt') elapsed_time = time.time() - start_time print('\n\nelapsed time: '+str(elapsed_time))
def genNetworkFromMatrix(matr): net = Network() # net.load_file('txt/example.txt') net.load_file_as_string(matr) net.make_clust(run_clustering=False, dendro=False, views=[]) return net.export_net_json('viz', 'no-indent')
# net.load_file('txt/mat_cats.tsv') # net.load_file('txt/mat_1mb.Txt') # net.load_file('txt/mnist.txt') # net.load_file('txt/sim_mat_4_cats.txt') views = ['N_row_sum', 'N_row_var'] # # filtering rows and cols by sum # net.filter_sum('row', threshold=20) # net.filter_sum('col', threshold=30) # # keep top rows based on sum # net.filter_N_top('row', 10, 'sum') net.make_clust(dist_type='cos', views=views, dendro=True, sim_mat=True, filter_sim=0.1) # net.produce_view({'N_row_sum':10,'dist':'euclidean'}) net.write_json_to_file('viz', 'json/mult_view.json', 'no-indent') net.write_json_to_file('sim_row', 'json/mult_view_sim_row.json', 'no-indent') net.write_json_to_file('sim_col', 'json/mult_view_sim_col.json', 'no-indent') elapsed_time = time.time() - start_time print('\n\nelapsed time') print(elapsed_time)
import time start_time = time.time() from clustergrammer import Network net = Network() net.load_file('txt/rc_two_cats.txt') # net.load_file('txt/tmp.txt') views = ['N_row_sum','N_row_var'] net.make_clust(dist_type='cos',views=views , dendro=True, sim_mat=True) net.write_json_to_file('viz', 'json/mult_view.json') net.write_json_to_file('sim_row', 'json/mult_view_sim_row.json') net.write_json_to_file('sim_col', 'json/mult_view_sim_col.json') elapsed_time = time.time() - start_time print('\n\nelapsed time') print(elapsed_time)
#################### inst_name = 'Tyrosine' # net.load_file('txt/phos_ratios_all_treat_no_geld_ST.txt') net.load_file('txt/phos_ratios_all_treat_no_geld_Tyrosine.txt') net.swap_nan_for_zero() # net.normalize(axis='row', norm_type='zscore', keep_orig=True) print(net.dat.keys()) views = ['N_row_sum', 'N_row_var'] net.make_clust(dist_type='cos', views=views, dendro=True, sim_mat=True, filter_sim=0.1, calc_cat_pval=False) # run_enrichr=['KEA_2015']) # run_enrichr=['ENCODE_TF_ChIP-seq_2014']) # run_enrichr=['GO_Biological_Process_2015']) net.write_json_to_file('viz', 'json/' + inst_name + '.json', 'no-indent') net.write_json_to_file('sim_row', 'json/' + inst_name + '_sim_row.json', 'no-indent') net.write_json_to_file('sim_col', 'json/' + inst_name + '_sim_col.json', 'no-indent') elapsed_time = time.time() - start_time print('\n\nelapsed time: ' + str(elapsed_time))