def exp_heatmap_json(request): import pandas as pd from clustergrammer import Network columns = [i.name for i in Exp._meta.get_fields()] #exps = Exp.objects.all().using("expDb").values_list("gene_id", "control_0", "control_1", "control_2", "treated_0", "treated_1", "treated_2") exps = Exp.objects.all().using("expDb").values() df = pd.DataFrame(list(exps), columns=columns) df.index = df.gene_id df = df.loc[:, df.columns[1:]] net = Network() net.load_df(df) # Z-score normalize the rows net.normalize(axis='row', norm_type='zscore', keep_orig=True) # filter for the top 100 columns based on their absolute value sum net.filter_N_top('col', 100, 'sum') # cluster using default parameters net.cluster() # save visualization JSON to file for use by front end data = net.export_net_json('viz') data = json.loads(data) #print(data) response = { 'data': data, } return JsonResponse(response, content_type='application/json')
def get_clusters(analysis, data_types): cluster_json = {} for data_type in data_types: analysis_data = get_last_analysis_data(analysis, data_type) data_df, design_df = get_dataframes(analysis_data, PKS[data_type], SAMPLE_COL) if data_type == GENOMICS: inference = WebOmicsInference(data_df, design_df, data_type) df = inference.standardize_df(inference.data_df) elif data_type == PROTEOMICS or data_type == METABOLOMICS: inference = WebOmicsInference(data_df, design_df, data_type, min_value=5000) df = inference.standardize_df(inference.data_df, log=True) if not df.empty: net = Network() net.load_df(df) net.cluster() data_type_label = { GENOMICS: 'gene', PROTEOMICS: 'protein', METABOLOMICS: 'compound' } label = data_type_label[data_type] json_data = net.export_net_json() cluster_json[label] = json_data return cluster_json
def build_layout(): df = load_data.load_gsva_compare_cluster('hallmark') # TODO THIS NEEDS TO BE CLEANED UP!!!!!! cat_to_true = defaultdict(lambda: []) for clust in df.index: if 'PJ030' in clust: cat_to_true['LGG'].append(clust) elif 'PJ' in clust: cat_to_true['GBM'].append(clust) elif 'LX' in clust: cat_to_true['LUAD'].append(clust) elif 'GSE146026' in clust: cat_to_true['OV'].append(clust) elif 'GSE72056' in clust: cat_to_true['SKCM'].append(clust) elif 'GSE103322' in clust: cat_to_true['HNSC'].append(clust) elif 'GSE111672' in clust: cat_to_true['PAAD'].append(clust) cats = [{ 'title': 'Cancer Type', 'cats': {k: v for k, v in cat_to_true.items()} }] net = Network() net.load_df(df) net.add_cats('row', cats) net.make_clust() layout = dcc.Tab(label='Cluster Comparison', children=[ dbc.Container( fluid=True, children=[ html.Link(rel='stylesheet', href='./static/custom.css'), dash_clustergrammer.DashClustergrammer( id='cgram-component', label='', network_data=net.viz) ]) ]) return layout
def get_clustergrammer_json(self, outfile): # Create network net = Network() # Load file net.load_df(self.expression_dataframe) # Add categories try: net.add_cats('col', self.sample_cats) except: pass try: # calculate clustering using default parameters net.cluster() # save visualization JSON to file for use by front end net.write_json_to_file('viz', outfile) except: os.system('touch {outfile}'.format(**locals()))
def prepare_clustergrammer_data(self, outfname='clustergrammer_data.json', G=None): """for a distance matrix, output a clustergrammer JSON file that clustergrammer-js can use for now it loads the clustergrammer-py module from local dev files TODO: once changes are pulled into clustergrammer-py, we can use the actual module (pip) :outfname: filename for the output json :G: networkx graph (use self.G_sym by default) """ G = self.G_sym or self.G # if Z is None: # G = self.G_sym or self.G # Z = self.get_linkage(G) clustergrammer_py_dev_dir = '../clustergrammer/clustergrammer-py/' sys.path.insert(0, clustergrammer_py_dev_dir) from clustergrammer import Network as ClustergrammerNetwork start = timer() d = nx.to_numpy_matrix(G) df = pd.DataFrame(d, index=G.nodes(), columns=G.nodes()) net = ClustergrammerNetwork() # net.load_file(infname) # net.load_file(mat) net.load_df(df) net.cluster(dist_type='precalculated') logger.debug("done loading and clustering. took {}".format( format_timespan(timer() - start))) logger.debug("writing to {}".format(outfname)) start = timer() net.write_json_to_file('viz', outfname) logger.debug("done writing file {}. took {}".format( outfname, format_timespan(timer() - start)))
new_rows = [(x, 'Cat-1: A', 'Cat-2: B') for x in df.index] new_cols = [(x, 'Cell Type: Unknown') for x in df.columns] # new_cols = [] # inst_val = -round(df.shape[1]/2) # for inst_col in df.columns.tolist(): # inst_val = inst_val + 1 # new_col = (inst_col, 'Cat: C-' + str(inst_val), 'Val: ' + str(inst_val)) # new_cols.append(new_col) # new_cols = [(x, 'Cat-1: A', 'Cat-2: B') for x in df.columns] # new_cols = [(x, 'Cat-1: A', 'Cat-2: B', 'Cat-3: C') for x in df.columns] df.index = new_rows df.columns = new_cols net.load_df(df) net.cluster(dist_type='cos', views=['N_row_sum', 'N_row_var'], dendro=True, sim_mat=False, filter_sim=0.1, calc_cat_pval=False, enrichrgram=True) # write jsons for front-end visualizations net.write_json_to_file('viz', 'data/big_data/custom.json', 'no-indent') # net.write_json_to_file('sim_row', 'json/mult_view_sim_row.json', 'no-indent') # net.write_json_to_file('sim_col', 'json/mult_view_sim_col.json', 'no-indent')
#make some plots with the combined data for item in final_LassoLD: swarms('all', item, featMatAll, directoryA, '.tif' ) plt.close() #%%Junk from here on #try clustergrammer from clustergrammer import Network net = Network() net.load_df(cloz_cluster) # filter for the top 100 columns based on their absolute value sum net.filter_N_top('col', 16, 'sum') # cluster using default parameters net.cluster() #leg_patch = mpatches.Patch(color = cmap, label=[lut.keys()]) #plt.legend(handles=[leg_patch]) plt.legend ([cmap], lut.keys()) plt.show() x swarms (rep1, feature, features_df, directory, file_type)