def execute_heatmap(self, analysis_id_I,simulation_ids_I=[],simulation_dateAndTimes_I=[], flux_units_I=[],rxn_ids_I=[], row_pdist_metric_I='euclidean',row_linkage_method_I='complete', col_pdist_metric_I='euclidean',col_linkage_method_I='complete', observable_only_I = False, order_rxnBySim_I = True, order_simulation_ids_I=False, order_rxn_ids_I=False, rxn_id_reverse_I=[]): '''Execute hierarchical cluster on row and column data INPUT: analysis_id_I = string, analysis id simulation_ids_I = list of simulation_ids simulation_dataAndTimes_I = list of simulation_dateAndTimes_I flux_units = list of flux units rxn_ids_I = list of rxn_ids observable_only_I = include only observable reactions order_rxnBySim_I = if True, rows will represent the fluxes and columns will represent the simulations if False, rows will represent the simulations and columns will represent the fluxes order_simulation_ids_I = if True, order of the simulation_ids will be kept order_rxn_ids_I = if True, order of the rxn_ids will be kept rxn_id_reverse_I = list of rxn_ids to reverse the flux direction Assumptions: all simulation_ids must be unique (i.e., 1 simulation but 2 simulation_dateAndTimes will break the algorithm) all simulation_ids must have the same flux units (i.e., ''' #print('executing heatmap...'); calculateheatmap = calculate_heatmap(); ## Pass 1: get all the data data_O = {}; rxn_ids_all = []; unobservable_fu_rxn_ids = {}; # get the simulation_id and simulation_id dateAndTimes if simulation_ids_I and simulation_dateAndTimes_I: simulation_ids = simulation_ids_I; simulation_dateAndTimes = simulation_dateAndTimes_I; else: simulation_ids = []; simulation_ids_unique = []; simulation_dateAndTimes = []; # get the simulation unique ids simulation_ids_unique = self.get_simulationID_analysisID_dataStage02IsotopomerAnalysis(analysis_id_I); for simulation_id in simulation_ids_unique: # get the simulation dateAndTimes simulation_dateAndTimes_tmp = [] simulation_dateAndTimes_tmp = self.get_simulationDateAndTimes_simulationID_dataStage02IsotopomerfittedNetFluxes(simulation_id); simulation_ids_tmp = [simulation_id for x in simulation_dateAndTimes_tmp]; simulation_dateAndTimes.extend(simulation_dateAndTimes_tmp) simulation_ids.extend(simulation_ids_tmp) for simulation_cnt_1, simulation_id_1 in enumerate(simulation_ids): #print('generating a heatmap for simulation_id ' + simulation_id_1); # get the units if flux_units_I: flux_units = flux_units_I; else: flux_units = []; flux_units = self.get_fluxUnits_simulationIDAndSimulationDateAndTime_dataStage02IsotopomerfittedNetFluxes(simulation_id_1,simulation_dateAndTimes[simulation_cnt_1]) for fu_cnt,fu in enumerate(flux_units): # initialize only on first iteration if simulation_cnt_1==0: data_O[fu] = {}; unobservable_fu_rxn_ids[fu] = set(); #print('generating a heatmap for flux_units ' + fu); # get the rxn_ids if rxn_ids_I: rxn_ids = rxn_ids_I; else: rxn_ids = []; rxn_ids = self.get_rxnIDs_simulationIDAndSimulationDateAndTimeAndFluxUnits_dataStage02IsotopomerfittedNetFluxes(simulation_id_1,simulation_dateAndTimes[simulation_cnt_1],fu); for rxn_id in rxn_ids: if simulation_cnt_1==0: data_O[fu][rxn_id] = []; if simulation_cnt_1!=0 and not rxn_id in data_O[fu].keys(): continue; rxn_ids_all.append(rxn_id); #print('generating a heatmap for rxn_id ' + rxn_id); # get the fluxes row = {}; row = self.get_row_simulationIDAndSimulationDateAndTimeAndFluxUnitsAndRxnID_dataStage02IsotopomerfittedNetFluxes(simulation_id_1,simulation_dateAndTimes[simulation_cnt_1],fu,rxn_id); if row: # change the direction of specified fluxes if row['rxn_id'] in rxn_id_reverse_I: row['flux']=-row['flux']; row['flux_lb']=-row['flux_lb']; row['flux_ub']=-row['flux_ub']; if observable_only_I: observable_1 = mfamethods.check_observableNetFlux(row['flux'],row['flux_lb'],row['flux_ub']) if observable_1: data_O[fu][rxn_id].append(dendrogram_row_1row); unobservable_fu_rxn_ids[fu].add(rxn_id); else: data_O[fu][rxn_id].append(row); ## Pass 2: data integrity check rxn_ids_unique = list(set(rxn_ids_all)); rxn_ids_unique.sort(); data_heatmap = {}; rxn_ids_dict = {}; for fu_cnt,fu in enumerate(list(data_O.keys())): data_heatmap[fu] = {}; rxn_ids_dict[fu] = set(); for rxn_id in rxn_ids_unique: if rxn_id in unobservable_fu_rxn_ids[fu]: continue; data_tmp = []; for simulation_cnt_1, simulation_id_1 in enumerate(simulation_ids): for d in data_O[fu][rxn_id]: if d['simulation_id'] == simulation_id_1 and d['simulation_dateAndTime'] == simulation_dateAndTimes[simulation_cnt_1]: data_tmp.append(d); break; # check that the length matches if len(data_tmp) == len(simulation_ids): data_heatmap[fu][rxn_id]= data_tmp; rxn_ids_dict[fu].add(rxn_id); ## Pass 3: generate the heatmap for each flux_unit heatmap_O = []; dendrogram_col_O = []; dendrogram_row_O = []; for fu_cnt,fu in enumerate(list(data_heatmap.keys())): # generate the clustering for the heatmap heatmap_1 = []; dendrogram_col_1 = {}; dendrogram_row_1 = {}; # extract out the data {rxn_id:[{},...],...} -> [[{},...],...] -> [{},...] data1 = [v for v in data_heatmap[fu].values()]; data2=[]; for d in data1: data2.extend(d); if order_rxnBySim_I: heatmap_1,dendrogram_col_1,dendrogram_row_1 = calculateheatmap.make_heatmap(data2, 'rxn_id','simulation_id','flux', row_pdist_metric_I=row_pdist_metric_I,row_linkage_method_I=row_linkage_method_I, col_pdist_metric_I=col_pdist_metric_I,col_linkage_method_I=col_linkage_method_I, filter_rows_I=rxn_ids_I, filter_columns_I=simulation_ids_I, order_rowsFromTemplate_I=rxn_ids_I, order_columnsFromTemplate_I=simulation_ids_I,); else: heatmap_1,dendrogram_col_1,dendrogram_row_1 = calculateheatmap.make_heatmap(data2, 'simulation_id','rxn_id','flux', row_pdist_metric_I=row_pdist_metric_I,row_linkage_method_I=row_linkage_method_I, col_pdist_metric_I=col_pdist_metric_I,col_linkage_method_I=col_linkage_method_I, filter_rows_I=simulation_ids_I, filter_columns_I=rxn_ids_I, order_rowsFromTemplate_I=simulation_ids_I, order_columnsFromTemplate_I=rxn_ids_I,); # add data to to the database for the heatmap for d in heatmap_1: d['analysis_id']=analysis_id_I; d['value_units']=fu; d['used_']=True; d['comment_']=None; heatmap_O.append(d); # add data to the database for the dendrograms dendrogram_col_1['analysis_id']=analysis_id_I; dendrogram_col_1['value_units']=fu; dendrogram_col_1['used_']=True; dendrogram_col_1['comment_']=None; dendrogram_col_O.append(dendrogram_col_1); dendrogram_row_1['analysis_id']=analysis_id_I; dendrogram_row_1['value_units']=fu; dendrogram_row_1['used_']=True; dendrogram_row_1['comment_']=None; dendrogram_row_O.append(dendrogram_row_1); self.add_rows_table('data_stage02_isotopomer_heatmap',heatmap_O); self.add_rows_table('data_stage02_isotopomer_dendrogram',dendrogram_col_O); self.add_rows_table('data_stage02_isotopomer_dendrogram',dendrogram_row_O);
def execute_heatmap_lineage(self, analysis_id_I, row_pdist_metric_I='euclidean',row_linkage_method_I='complete', col_pdist_metric_I='euclidean',col_linkage_method_I='complete', mutation_id_exclusion_list = []): '''Execute hierarchical cluster on row and column data''' calculateheatmap = calculate_heatmap(); print('executing heatmap...'); # get the analysis information experiment_ids,lineage_names = [],[]; experiment_ids,lineage_names = self.get_experimentIDAndLineageName_analysisID_dataStage01ResequencingAnalysis(analysis_id_I); # partition into variables: intermediates_lineage = []; mutation_data_lineage_all = []; rows_lineage = []; n_lineages = len(lineage_names) cnt_sample_names = 0; for lineage_name_cnt,lineage_name in enumerate(lineage_names): # get ALL intermediates by experiment_id and lineage name intermediates = []; intermediates = self.get_intermediates_experimentIDAndLineageName_dataStage01ResequencingLineage(experiment_ids[lineage_name_cnt],lineage_name); intermediates_lineage.append(intermediates); cnt_sample_names += len(intermediates) # get ALL mutation data by experiment_id and lineage name mutation_data = []; mutation_data = self.get_mutationData_experimentIDAndLineageName_dataStage01ResequencingLineage(experiment_ids[lineage_name_cnt],lineage_name); mutation_data_lineage_all.extend(mutation_data); # get ALL mutation frequencies by experiment_id and lineage name rows = []; rows = self.get_row_experimentIDAndLineageName_dataStage01ResequencingLineage(experiment_ids[lineage_name_cnt],lineage_name) rows_lineage.extend(rows); mutation_data_lineage_unique = list(set(mutation_data_lineage_all)); mutation_data_lineage = [x for x in mutation_data_lineage_unique if not x in mutation_id_exclusion_list]; min_inter = min(intermediates_lineage) max_inter = max(intermediates_lineage); # generate the frequency matrix data structure (mutation x intermediate) data_O = numpy.zeros((cnt_sample_names,len(mutation_data_lineage))); labels_O = {}; lineages=[]; col_cnt = 0; # order 2: groups each lineage by mutation (intermediate x mutation) for lineage_name_cnt,lineage_name in enumerate(lineage_names): #all lineages for intermediate j / mutation i for intermediate_cnt,intermediate in enumerate(intermediates_lineage[lineage_name_cnt]): if intermediate_cnt == min(intermediates_lineage[lineage_name_cnt]): lineages.append(lineage_name+": "+"start"); # corresponding label from hierarchical clustering (in this case, arbitrary) elif intermediate_cnt == max(intermediates_lineage[lineage_name_cnt]): lineages.append(lineage_name+": "+"end"); # corresponding label from hierarchical clustering (in this case, arbitrary) else: lineages.append(lineage_name+": "+str(intermediate)); # corresponding label from hierarchical clustering (in this case, arbitrary) for mutation_cnt,mutation in enumerate(mutation_data_lineage): #all mutations i for intermediate j for row in rows_lineage: if row['mutation_id'] == mutation and row['intermediate'] == intermediate and row['lineage_name'] == lineage_name: data_O[col_cnt,mutation_cnt] = row['mutation_frequency']; #print col_cnt,mutation_cnt col_cnt+=1; # generate the clustering for the heatmap heatmap_O = []; dendrogram_col_O = {}; dendrogram_row_O = {}; heatmap_O,dendrogram_col_O,dendrogram_row_O = calculateheatmap.heatmap(data_O,lineages,mutation_data_lineage, row_pdist_metric_I=row_pdist_metric_I,row_linkage_method_I=row_linkage_method_I, col_pdist_metric_I=col_pdist_metric_I,col_linkage_method_I=col_linkage_method_I); # add data to to the database for the heatmap for d in heatmap_O: row = None; row = data_stage01_resequencing_heatmap( analysis_id_I, d['col_index'], d['row_index'], d['value'], d['col_leaves'], d['row_leaves'], d['col_label'], d['row_label'], d['col_pdist_metric'], d['row_pdist_metric'], d['col_linkage_method'], d['row_linkage_method'], 'frequency',True, None); self.session.add(row); # add data to the database for the dendrograms row = None; row = data_stage01_resequencing_dendrogram( analysis_id_I, dendrogram_col_O['leaves'], dendrogram_col_O['icoord'], dendrogram_col_O['dcoord'], dendrogram_col_O['ivl'], dendrogram_col_O['colors'], dendrogram_col_O['pdist_metric'], dendrogram_col_O['pdist_metric'], 'frequency',True, None); self.session.add(row); row = None; row = data_stage01_resequencing_dendrogram( analysis_id_I, dendrogram_row_O['leaves'], dendrogram_row_O['icoord'], dendrogram_row_O['dcoord'], dendrogram_row_O['ivl'], dendrogram_row_O['colors'], dendrogram_row_O['pdist_metric'], dendrogram_row_O['pdist_metric'], 'frequency',True, None); self.session.add(row); self.session.commit();
def execute_heatmap(self, analysis_id_I,gene_exclusion_list=[], row_pdist_metric_I='euclidean',row_linkage_method_I='complete', col_pdist_metric_I='euclidean',col_linkage_method_I='complete', order_sampleNameByGeneNameShort_I=False, sample_names_I=[], gene_name_shorts_I=[],): '''Execute hierarchical cluster on row and column data''' print('executing heatmap...'); calculateheatmap = calculate_heatmap(); #fpkmsheatmap = fpkms_heatmap(); # get the analysis information experiment_ids,sample_names = [],[]; experiment_ids,sample_names = self.get_experimentIDAndSampleName_analysisID_dataStage01RNASequencingAnalysis(analysis_id_I); fpkms_all = []; for sample_name_cnt,sample_name in enumerate(sample_names): # query fpkm data: fpkms = []; fpkms = self.get_rows_experimentIDAndSampleName_dataStage01RNASequencingGenesFpkmTracking(experiment_ids[sample_name_cnt],sample_name); fpkms_all.extend(fpkms); heatmap_O = []; dendrogram_col_O = []; dendrogram_row_O = []; if order_sampleNameByGeneNameShort_I: heatmap_1,dendrogram_col_1,dendrogram_row_1 = calculateheatmap.make_heatmap(fpkms_all, 'sample_name','gene_short_name','FPKM', row_pdist_metric_I=row_pdist_metric_I,row_linkage_method_I=row_linkage_method_I, col_pdist_metric_I=col_pdist_metric_I,col_linkage_method_I=col_linkage_method_I, filter_rows_I=sample_names_I, filter_columns_I=gene_name_shorts_I, order_rowsFromTemplate_I=sample_names_I, order_columnsFromTemplate_I=gene_name_shorts_I,); else: heatmap_1,dendrogram_col_1,dendrogram_row_1 = calculateheatmap.make_heatmap(fpkms_all, 'gene_short_name','sample_name','FPKM', row_pdist_metric_I=row_pdist_metric_I,row_linkage_method_I=row_linkage_method_I, col_pdist_metric_I=col_pdist_metric_I,col_linkage_method_I=col_linkage_method_I, filter_rows_I=gene_name_shorts_I, filter_columns_I=sample_names_I, order_rowsFromTemplate_I=gene_name_shorts_I, order_columnsFromTemplate_I=sample_names_I); # add data to to the database for the heatmap for d in heatmap_1: d['analysis_id']=analysis_id_I; d['value_units']='FPKM'; d['used_']=True; d['comment_']=None; heatmap_O.append(d); ## add data to the database for the dendrograms dendrogram_col_1['analysis_id']=analysis_id_I; dendrogram_col_1['value_units']='FPKM'; dendrogram_col_1['used_']=True; dendrogram_col_1['comment_']=None; dendrogram_col_O.append(dendrogram_col_1); dendrogram_row_1['analysis_id']=analysis_id_I; dendrogram_row_1['value_units']='FPKM'; dendrogram_row_1['used_']=True; dendrogram_row_1['comment_']=None; dendrogram_row_O.append(dendrogram_row_1); self.add_rows_table('data_stage01_rnasequencing_heatmap',heatmap_O); #self.add_rows_table('data_stage01_rnasequencing',dendrogram_col_O); #self.add_rows_table('data_stage01_rnasequencing',dendrogram_row_O);
def execute_heatmap_mutationsAnnotated(self, analysis_id_I,mutation_id_exclusion_list=[],frequency_threshold=0.1,max_position=4630000, row_pdist_metric_I='euclidean',row_linkage_method_I='complete', col_pdist_metric_I='euclidean',col_linkage_method_I='complete', order_sampleNameByMutationID_I=False, sample_names_I=[], mutationIDs_I=[], ): '''Execute hierarchical cluster on row and column data''' calculateheatmap = calculate_heatmap(); mutationsheatmap = mutations_heatmap(); genomediff = genome_diff(); print('executing heatmap...'); # get the analysis information experiment_ids,sample_names = [],[]; experiment_ids,sample_names = self.get_experimentIDAndSampleName_analysisID_dataStage01ResequencingAnalysis(analysis_id_I); #mutations_all = []; mutation_data_O = []; for sample_name_cnt,sample_name in enumerate(sample_names): # query mutation data: mutations = []; mutations = self.get_mutations_experimentIDAndSampleName_dataStage01ResequencingMutationsAnnotated(experiment_ids[sample_name_cnt],sample_name); #mutations_all.extend(mutations); for mutation in mutations: if mutation['mutation_position'] > max_position: continue; if mutation['mutation_frequency']<frequency_threshold: continue; #if not mutation['mutation_genes']: # mutation['mutation_genes'] = ['unknown']; # mutation id mutation_id = genomediff._make_mutationID(mutation['mutation_genes'],mutation['mutation_type'],mutation['mutation_position']); if mutation_id in mutation_id_exclusion_list: continue; tmp = {}; tmp.update(mutation); tmp.update({'mutation_id':mutation_id}); mutation_data_O.append(tmp); heatmap_O = []; dendrogram_col_O = {}; dendrogram_row_O = {}; if order_sampleNameByMutationID_I: heatmap_O,dendrogram_col_O,dendrogram_row_O = calculateheatmap.make_heatmap(mutation_data_O, 'sample_name','mutation_id','mutation_frequency', row_pdist_metric_I=row_pdist_metric_I,row_linkage_method_I=row_linkage_method_I, col_pdist_metric_I=col_pdist_metric_I,col_linkage_method_I=col_linkage_method_I, filter_rows_I=sample_names_I, filter_columns_I=mutationIDs_I, order_rowsFromTemplate_I=sample_names_I, order_columnsFromTemplate_I=mutationIDs_I,); else: heatmap_O,dendrogram_col_O,dendrogram_row_O = calculateheatmap.make_heatmap(mutation_data_O, 'mutation_id','sample_name','mutation_frequency', row_pdist_metric_I=row_pdist_metric_I,row_linkage_method_I=row_linkage_method_I, col_pdist_metric_I=col_pdist_metric_I,col_linkage_method_I=col_linkage_method_I, filter_rows_I=mutationIDs_I, filter_columns_I=sample_names_I, order_rowsFromTemplate_I=mutationIDs_I, order_columnsFromTemplate_I=sample_names_I); ## generate the clustering for the heatmap #mutationsheatmap.mutations = mutations_all; #mutationsheatmap.sample_names = sample_names; #mutationsheatmap.make_heatmap(mutation_id_exclusion_list=mutation_id_exclusion_list,max_position=max_position, # row_pdist_metric_I=row_pdist_metric_I,row_linkage_method_I=row_linkage_method_I, # col_pdist_metric_I=col_pdist_metric_I,col_linkage_method_I=col_linkage_method_I) #heatmap_O = mutationsheatmap.heatmap; #dendrogram_col_O = mutationsheatmap.dendrogram_col; #dendrogram_row_O = mutationsheatmap.dendrogram_row; # add data to to the database for the heatmap for d in heatmap_O: row = None; row = data_stage01_resequencing_heatmap( analysis_id_I, d['col_index'], d['row_index'], d['value'], d['col_leaves'], d['row_leaves'], d['col_label'], d['row_label'], d['col_pdist_metric'], d['row_pdist_metric'], d['col_linkage_method'], d['row_linkage_method'], 'frequency',True, None); self.session.add(row); # add data to the database for the dendrograms row = None; row = data_stage01_resequencing_dendrogram( analysis_id_I, dendrogram_col_O['leaves'], dendrogram_col_O['icoord'], dendrogram_col_O['dcoord'], dendrogram_col_O['ivl'], dendrogram_col_O['colors'], dendrogram_col_O['pdist_metric'], dendrogram_col_O['pdist_metric'], 'frequency',True, None); self.session.add(row); row = None; row = data_stage01_resequencing_dendrogram( analysis_id_I, dendrogram_row_O['leaves'], dendrogram_row_O['icoord'], dendrogram_row_O['dcoord'], dendrogram_row_O['ivl'], dendrogram_row_O['colors'], dendrogram_row_O['pdist_metric'], dendrogram_row_O['pdist_metric'], 'frequency',True, None); self.session.add(row); self.session.commit();