def get_clusters(analysis, data_types): cluster_json = {} for data_type in data_types: analysis_data = get_last_analysis_data(analysis, data_type) data_df, design_df = get_dataframes(analysis_data, PKS[data_type], SAMPLE_COL) if data_type == GENOMICS: inference = WebOmicsInference(data_df, design_df, data_type) df = inference.standardize_df(inference.data_df) elif data_type == PROTEOMICS or data_type == METABOLOMICS: inference = WebOmicsInference(data_df, design_df, data_type, min_value=5000) df = inference.standardize_df(inference.data_df, log=True) if not df.empty: net = Network() net.load_df(df) net.cluster() data_type_label = { GENOMICS: 'gene', PROTEOMICS: 'protein', METABOLOMICS: 'compound' } label = data_type_label[data_type] json_data = net.export_net_json() cluster_json[label] = json_data return cluster_json
def exp_heatmap_json(request): import pandas as pd from clustergrammer import Network columns = [i.name for i in Exp._meta.get_fields()] #exps = Exp.objects.all().using("expDb").values_list("gene_id", "control_0", "control_1", "control_2", "treated_0", "treated_1", "treated_2") exps = Exp.objects.all().using("expDb").values() df = pd.DataFrame(list(exps), columns=columns) df.index = df.gene_id df = df.loc[:, df.columns[1:]] net = Network() net.load_df(df) # Z-score normalize the rows net.normalize(axis='row', norm_type='zscore', keep_orig=True) # filter for the top 100 columns based on their absolute value sum net.filter_N_top('col', 100, 'sum') # cluster using default parameters net.cluster() # save visualization JSON to file for use by front end data = net.export_net_json('viz') data = json.loads(data) #print(data) response = { 'data': data, } return JsonResponse(response, content_type='application/json')
def prepare_heatmap(matrix_input, html_file, html_dir, tools_dir, categories, distance, linkage): # prepare directory and html os.mkdir(html_dir) env = Environment(loader=FileSystemLoader(tools_dir + "/templates")) template = env.get_template("clustergrammer.template") overview = template.render() with open(html_file, "w") as outf: outf.write(overview) json_output = html_dir + "/mult_view.json" net = Network() net.load_file(matrix_input) if (categories['row']): net.add_cats('row', categories['row']) if (categories['col']): net.add_cats('col', categories['col']) net.cluster(dist_type=distance, linkage_type=linkage) net.write_json_to_file('viz', json_output)
def get_clustergrammer_json(self, outfile): # Create network net = Network() # Load file net.load_df(self.expression_dataframe) # Add categories try: net.add_cats('col', self.sample_cats) except: pass try: # calculate clustering using default parameters net.cluster() # save visualization JSON to file for use by front end net.write_json_to_file('viz', outfile) except: os.system('touch {outfile}'.format(**locals()))
def prepare_clustergrammer_data(self, outfname='clustergrammer_data.json', G=None): """for a distance matrix, output a clustergrammer JSON file that clustergrammer-js can use for now it loads the clustergrammer-py module from local dev files TODO: once changes are pulled into clustergrammer-py, we can use the actual module (pip) :outfname: filename for the output json :G: networkx graph (use self.G_sym by default) """ G = self.G_sym or self.G # if Z is None: # G = self.G_sym or self.G # Z = self.get_linkage(G) clustergrammer_py_dev_dir = '../clustergrammer/clustergrammer-py/' sys.path.insert(0, clustergrammer_py_dev_dir) from clustergrammer import Network as ClustergrammerNetwork start = timer() d = nx.to_numpy_matrix(G) df = pd.DataFrame(d, index=G.nodes(), columns=G.nodes()) net = ClustergrammerNetwork() # net.load_file(infname) # net.load_file(mat) net.load_df(df) net.cluster(dist_type='precalculated') logger.debug("done loading and clustering. took {}".format( format_timespan(timer() - start))) logger.debug("writing to {}".format(outfname)) start = timer() net.write_json_to_file('viz', outfname) logger.debug("done writing file {}. took {}".format( outfname, format_timespan(timer() - start)))
# make network object and load file from clustergrammer import Network net = Network() net.load_file('mult_view.tsv') # Z-score normalize the rows #net.normalize(axis='row', norm_type='zscore', keep_orig=True) # calculate clustering using default parameters net.cluster() # save visualization JSON to file for use by front end net.write_json_to_file('viz', 'mult_view.json') # needs pandas and sklearn as well # pip install --user --upgrade clustergrammer pandas sklearn
''' Python 2.7 The clustergrammer python module can be installed using pip: pip install clustergrammer or by getting the code from the repo: https://github.com/MaayanLab/clustergrammer-py ''' from clustergrammer import Network net = Network() # load matrix tsv file net.load_file('txt/heatmap_features.txt') net.set_cat_color('row', 1, 'Feature Type: Interactivity', 'yellow') net.set_cat_color('row', 1, 'Feature Type: Sharing', 'blue') net.set_cat_color('row', 1, 'Feature Type: Usability', 'orange') net.set_cat_color('row', 1, 'Feature Type: Biology-Specific', 'red') net.cluster(dist_type='cos', views=[], dendro=True, filter_sim=0.1, calc_cat_pval=False, enrichrgram=False) # write jsons for front-end visualizations net.write_json_to_file('viz', 'json/mult_view.json', 'indent')
# for inst_col in df.columns.tolist(): # inst_val = inst_val + 1 # new_col = (inst_col, 'Cat: C-' + str(inst_val), 'Val: ' + str(inst_val)) # new_cols.append(new_col) # new_cols = [(x, 'Cat-1: A', 'Cat-2: B') for x in df.columns] # new_cols = [(x, 'Cat-1: A', 'Cat-2: B', 'Cat-3: C') for x in df.columns] df.index = new_rows df.columns = new_cols net.load_df(df) net.cluster(dist_type='cos', views=['N_row_sum', 'N_row_var'], dendro=True, sim_mat=False, filter_sim=0.1, calc_cat_pval=False, enrichrgram=True) # write jsons for front-end visualizations net.write_json_to_file('viz', 'data/big_data/custom.json', 'no-indent') # net.write_json_to_file('sim_row', 'json/mult_view_sim_row.json', 'no-indent') # net.write_json_to_file('sim_col', 'json/mult_view_sim_col.json', 'no-indent') # net.normalize(axis='row', norm_type='zscore') net.cluster(dist_type='cos', views=['N_row_sum', 'N_row_var'], dendro=True,
ids = delta_f.columns.map(lambda x: x.split('|')[0]) fout = open("%s_heatmap_matrix.txt" % args.d, 'w') fout.write("\t\t%s\n" % ('\t'.join(tfs))) cls = [] for i in ids: if ann_dict.get(i, ['NA'])[0] == 'NA': cls.append("Cell Line: %s" % ('NA')) else: cls.append("Cell Line: %s" % (ann_dict[i][0])) fout.write("\t\t%s\n" % ('\t'.join(cls))) ts = [] for i in ids: if ann_dict.get(i, ['NA', 'NA'])[1] == 'NA': ts.append("Tissue: %s" % ('NA')) else: ts.append("Tissue: %s" % (ann_dict[i][1])) fout.write("\t\t%s\n" % ('\t'.join(ts))) for i in range(status.shape[0]): fout.write('%s\t%s\t%s\n' % ("Gene: %s" % genes[i], "Input Gene: %s" % status[i], '\t'.join( delta_f.iloc[i, :].map(str)))) fout.close() net.load_file("%s_heatmap_matrix.txt" % args.d) net.cluster() net.write_json_to_file('viz', '%s_mult_view.json' % args.d)
"#CC0744", "#C0B9B2", "#C2FF99", "#001E09", "#00489C", "#6F0062", "#0CBD66", "#EEC3FF", "#456D75", "#B77B68", "#7A87A1", "#788D66", "#885578", "#0089A3", "#FF8A9A", "#D157A0", "#BEC459", "#456648", "#0086ED", "#886F4C", "#34362D", "#B4A8BD", "#00A6AA", "#452C2C", "#636375", "#A3C8C9", "#FF913F", "#938A81", "#575329", "#00FECF", "#B05B6F", "#8CD0FF", "#3B9700", "#04F757", "#C8A1A1", "#1E6E00", "#7900D7", "#A77500", "#6367A9", "#A05837", "#6B002C", "#772600", "#D790FF", "#9B9700", "#549E79", "#FFF69F", "#201625", "#CB7E98", "#72418F", "#BC23FF", "#99ADC0", "#3A2465", "#922329", "#5B4534", "#FDE8DC", "#404E55", "#FAD09F", "#A4E804", "#f58231", "#324E72", "#402334" ] for i in range(len(color_array3)): label = 'SC3 label: _' + str(i) + '_' net.set_cat_color(axis='col', cat_index=1, cat_name=label, inst_color=color_array3[i]) #console.log(color_array[i]); if use_user_label == '1': for j in range(len(unique_array)): userlabel = 'User\'s label: _' + str(unique_array[j]) + '_' net.set_cat_color(axis='col', cat_index=2, cat_name=userlabel, inst_color=color_array3[63 - j]) net.cluster(dist_type='cos', enrichrgram=True, run_clustering=False) # write jsons for front-end visualizations out = wd + 'json/' + outname + '.json' net.write_json_to_file('viz', out, 'indent')
net.load_file('txt/rc_two_cats.txt') # net.load_file('txt/ccle_example.txt') # net.load_file('txt/rc_val_cats.txt') # net.load_file('txt/number_labels.txt') # net.load_file('txt/mnist.txt') # net.load_file('txt/tuple_cats.txt') # net.load_file('txt/example_tsv.txt') # net.enrichrgram('KEA_2015') # optional filtering and normalization ########################################## # net.filter_sum('row', threshold=20) # net.normalize(axis='col', norm_type='zscore', keep_orig=True) # net.filter_N_top('row', 250, rank_type='sum') # net.filter_threshold('row', threshold=3.0, num_occur=4) # net.swap_nan_for_zero() # net.set_cat_color('col', 1, 'Category: one', 'blue') # net.make_clust() # net.dendro_cats('row', 5) net.cluster(dist_type='cos',views=['N_row_sum', 'N_row_var'] , dendro=True, sim_mat=True, filter_sim=0.1, calc_cat_pval=False, enrichrgram= False, run_clustering=True) # write jsons for front-end visualizations net.write_json_to_file('viz', 'json/mult_view.json', 'indent') net.write_json_to_file('sim_row', 'json/mult_view_sim_row.json', 'no-indent') net.write_json_to_file('sim_col', 'json/mult_view_sim_col.json', 'no-indent')
from clustergrammer import Network if __name__ == "__main__": matrix_filename = sys.argv[1] html_output_filename = sys.argv[2] print('loading file...') net = Network() # load matrix file net.load_file(matrix_filename) print('done') # cluster using default parameters print('clustering the matrix...') net.cluster(dist_type='jaccard', linkage_type='complete') # net.cluster(run_clustering=False) print('done') # save visualization JSON to file for use by front end print('saving results in json file...') json_filename = matrix_filename + '.json' net.write_json_to_file('viz', json_filename) print('done') # creating the html page print('creating the html page...') network_data = '' file = open(json_filename, 'rt') for line in file: network_data += line
The clustergrammer python module can be installed using pip: pip install clustergrammer or by getting the code from the repo: https://github.com/MaayanLab/clustergrammer-py ''' from clustergrammer import Network net = Network() # load matrix tsv file net.load_file('../data_mats/df_predict_merge.txt') net.set_cat_color('row', 1, 'virus: chik', 'blue') net.set_cat_color('row', 1, 'virus: zika', 'red') net.cluster(enrichrgram=False) # transfer colors from original to predicted categories ######################################################## # make category colors the same for Chik groups for inst_cat in net.viz['cat_colors']['row']['cat-1']: new_cat = inst_cat.replace('original', 'predict') inst_color = net.viz['cat_colors']['row']['cat-1'][inst_cat] net.set_cat_color('row', 3, new_cat, inst_color) net.cluster(enrichrgram=False) # write jsons for front-end visualizations net.write_json_to_file('viz', 'json/mult_view.json', 'indent') # net.write_json_to_file('sim_row', 'json/mult_view_sim_row.json', 'no-indent')
def clust_from_response(response_list): from clustergrammer import Network import scipy import json import pandas as pd import math from copy import deepcopy # print('----------------------') # print('enrichr_clust_from_response') # print('----------------------') ini_enr = transfer_to_enr_dict(response_list) enr = [] scores = {} score_types = ['combined_score', 'pval', 'zscore'] for score_type in score_types: scores[score_type] = pd.Series() for inst_enr in ini_enr: if inst_enr['combined_score'] > 0: # make series of enriched terms with scores for score_type in score_types: # collect the scores of the enriched terms if score_type == 'combined_score': scores[score_type][inst_enr['name']] = inst_enr[score_type] if score_type == 'pval': scores[score_type][inst_enr['name']] = -math.log( inst_enr[score_type]) if score_type == 'zscore': scores[score_type][ inst_enr['name']] = -inst_enr[score_type] # keep enrichement values enr.append(inst_enr) # sort and normalize the scores for score_type in score_types: scores[score_type] = scores[score_type] / scores[score_type].max() scores[score_type].sort_values(ascending=False) number_of_enriched_terms = len(scores['combined_score']) enr_score_types = ['combined_score', 'pval', 'zscore'] if number_of_enriched_terms < 10: num_dict = {'ten': 10} elif number_of_enriched_terms < 20: num_dict = {'ten': 10, 'twenty': 20} else: num_dict = {'ten': 10, 'twenty': 20, 'thirty': 30} # gather lists of top scores top_terms = {} for enr_type in enr_score_types: top_terms[enr_type] = {} for num_terms in list(num_dict.keys()): inst_num = num_dict[num_terms] top_terms[enr_type][num_terms] = scores[enr_type].index.tolist( )[:inst_num] # gather the terms that should be kept - they are at the top of the score list keep_terms = [] for inst_enr_score in top_terms: for tmp_num in list(num_dict.keys()): keep_terms.extend(top_terms[inst_enr_score][tmp_num]) keep_terms = list(set(keep_terms)) # keep enriched terms that are at the top 10 based on at least one score keep_enr = [] for inst_enr in enr: if inst_enr['name'] in keep_terms: keep_enr.append(inst_enr) # fill in full matrix ####################### # genes row_node_names = [] # enriched terms col_node_names = [] # gather information from the list of enriched terms for inst_enr in keep_enr: col_node_names.append(inst_enr['name']) row_node_names.extend(inst_enr['int_genes']) row_node_names = sorted(list(set(row_node_names))) net = Network() net.dat['nodes']['row'] = row_node_names net.dat['nodes']['col'] = col_node_names net.dat['mat'] = scipy.zeros([len(row_node_names), len(col_node_names)]) for inst_enr in keep_enr: inst_term = inst_enr['name'] col_index = col_node_names.index(inst_term) # use combined score for full matrix - will not be seen in viz tmp_score = scores['combined_score'][inst_term] net.dat['node_info']['col']['value'].append(tmp_score) for inst_gene in inst_enr['int_genes']: row_index = row_node_names.index(inst_gene) # save association net.dat['mat'][row_index, col_index] = 1 # cluster full matrix ############################# # do not make multiple views views = [''] if len(net.dat['nodes']['row']) > 1: net.cluster(dist_type='jaccard', views=views, dendro=False) else: net.cluster(dist_type='jaccard', views=views, dendro=False, run_clustering=False) # get dataframe from full matrix df = net.dat_to_df() for score_type in score_types: for num_terms in num_dict: inst_df = deepcopy(df) inst_net = deepcopy(Network()) inst_df = inst_df[top_terms[score_type][num_terms]] # load back into net inst_net.df_to_dat(inst_df) # make views if len(net.dat['nodes']['row']) > 1: inst_net.cluster(dist_type='jaccard', views=['N_row_sum'], dendro=False) else: inst_net.cluster(dist_type='jaccard', views=['N_row_sum'], dendro=False, run_clustering=False) inst_views = inst_net.viz['views'] # add score_type to views for inst_view in inst_views: inst_view['N_col_sum'] = num_dict[num_terms] inst_view['enr_score_type'] = score_type # add values to col_nodes and order according to rank for inst_col in inst_view['nodes']['col_nodes']: inst_col['rank'] = len( top_terms[score_type] [num_terms]) - top_terms[score_type][num_terms].index( inst_col['name']) inst_name = inst_col['name'] inst_col['value'] = scores[score_type][inst_name] # add views to main network net.viz['views'].extend(inst_views) return net