def execute_heatmap(self,
                analysis_id_I,simulation_ids_I=[],simulation_dateAndTimes_I=[],
                flux_units_I=[],rxn_ids_I=[],
                row_pdist_metric_I='euclidean',row_linkage_method_I='complete',
                col_pdist_metric_I='euclidean',col_linkage_method_I='complete',
                observable_only_I = False,
                order_rxnBySim_I = True,
                order_simulation_ids_I=False,
                order_rxn_ids_I=False,
                rxn_id_reverse_I=[]):
        '''Execute hierarchical cluster on row and column data
        INPUT:
        analysis_id_I = string, analysis id
        simulation_ids_I = list of simulation_ids
        simulation_dataAndTimes_I = list of simulation_dateAndTimes_I
        flux_units = list of flux units
        rxn_ids_I = list of rxn_ids
        observable_only_I = include only observable reactions
        order_rxnBySim_I = if True, rows will represent the fluxes and columns will represent the simulations
                           if False, rows will represent the simulations and columns will represent the fluxes
        order_simulation_ids_I = if True, order of the simulation_ids will be kept
        order_rxn_ids_I = if True, order of the rxn_ids will be kept
        rxn_id_reverse_I = list of rxn_ids to reverse the flux direction
        Assumptions:
        all simulation_ids must be unique (i.e., 1 simulation but 2 simulation_dateAndTimes will break the algorithm)
        all simulation_ids must have the same flux units (i.e., 
        '''

        #print('executing heatmap...');
        calculateheatmap = calculate_heatmap();
        ## Pass 1: get all the data
        data_O = {};
        rxn_ids_all = [];
        unobservable_fu_rxn_ids = {};
        # get the simulation_id and simulation_id dateAndTimes
        if simulation_ids_I and simulation_dateAndTimes_I:
            simulation_ids = simulation_ids_I;
            simulation_dateAndTimes = simulation_dateAndTimes_I;
        else:
            simulation_ids = [];
            simulation_ids_unique = [];
            simulation_dateAndTimes = [];
            # get the simulation unique ids
            simulation_ids_unique = self.get_simulationID_analysisID_dataStage02IsotopomerAnalysis(analysis_id_I);
            for simulation_id in simulation_ids_unique:
                # get the simulation dateAndTimes
                simulation_dateAndTimes_tmp = []
                simulation_dateAndTimes_tmp = self.get_simulationDateAndTimes_simulationID_dataStage02IsotopomerfittedNetFluxes(simulation_id);
                simulation_ids_tmp = [simulation_id for x in simulation_dateAndTimes_tmp];
                simulation_dateAndTimes.extend(simulation_dateAndTimes_tmp)
                simulation_ids.extend(simulation_ids_tmp)
        for simulation_cnt_1, simulation_id_1 in enumerate(simulation_ids):
            #print('generating a heatmap for simulation_id ' + simulation_id_1);
            # get the units
            if flux_units_I:
                flux_units = flux_units_I;
            else:
                flux_units = [];
                flux_units = self.get_fluxUnits_simulationIDAndSimulationDateAndTime_dataStage02IsotopomerfittedNetFluxes(simulation_id_1,simulation_dateAndTimes[simulation_cnt_1])
            for fu_cnt,fu in enumerate(flux_units):
                # initialize only on first iteration
                if simulation_cnt_1==0:
                    data_O[fu] = {};
                    unobservable_fu_rxn_ids[fu] = set();
                #print('generating a heatmap for flux_units ' + fu);
                # get the rxn_ids
                if rxn_ids_I:
                    rxn_ids = rxn_ids_I;
                else:
                    rxn_ids = [];
                    rxn_ids = self.get_rxnIDs_simulationIDAndSimulationDateAndTimeAndFluxUnits_dataStage02IsotopomerfittedNetFluxes(simulation_id_1,simulation_dateAndTimes[simulation_cnt_1],fu);
                for rxn_id in rxn_ids:
                    if simulation_cnt_1==0:
                        data_O[fu][rxn_id] = [];
                    if simulation_cnt_1!=0 and not rxn_id in data_O[fu].keys():
                        continue;
                    rxn_ids_all.append(rxn_id);
                    #print('generating a heatmap for rxn_id ' + rxn_id);
                    # get the fluxes
                    row = {};
                    row = self.get_row_simulationIDAndSimulationDateAndTimeAndFluxUnitsAndRxnID_dataStage02IsotopomerfittedNetFluxes(simulation_id_1,simulation_dateAndTimes[simulation_cnt_1],fu,rxn_id);
                    if row:    
                        # change the direction of specified fluxes
                        if row['rxn_id'] in rxn_id_reverse_I:
                            row['flux']=-row['flux'];
                            row['flux_lb']=-row['flux_lb'];
                            row['flux_ub']=-row['flux_ub'];
                        if observable_only_I:
                            observable_1 = mfamethods.check_observableNetFlux(row['flux'],row['flux_lb'],row['flux_ub'])
                            if observable_1: 
                                data_O[fu][rxn_id].append(dendrogram_row_1row);
                                unobservable_fu_rxn_ids[fu].add(rxn_id);
                        else:
                            data_O[fu][rxn_id].append(row);
        ## Pass 2: data integrity check
        rxn_ids_unique = list(set(rxn_ids_all));
        rxn_ids_unique.sort();
        data_heatmap = {};
        rxn_ids_dict = {};
        for fu_cnt,fu in enumerate(list(data_O.keys())):
            data_heatmap[fu] = {};
            rxn_ids_dict[fu] = set();
            for rxn_id in rxn_ids_unique:
                if rxn_id in unobservable_fu_rxn_ids[fu]:
                    continue;
                data_tmp = [];
                for simulation_cnt_1, simulation_id_1 in enumerate(simulation_ids):
                    for d in data_O[fu][rxn_id]:
                        if d['simulation_id'] == simulation_id_1 and d['simulation_dateAndTime'] == simulation_dateAndTimes[simulation_cnt_1]:
                            data_tmp.append(d);
                            break;
                # check that the length matches
                if len(data_tmp) == len(simulation_ids):
                    data_heatmap[fu][rxn_id]= data_tmp;
                    rxn_ids_dict[fu].add(rxn_id);
        ## Pass 3: generate the heatmap for each flux_unit
        heatmap_O = [];
        dendrogram_col_O = [];
        dendrogram_row_O = [];
        for fu_cnt,fu in enumerate(list(data_heatmap.keys())):
            # generate the clustering for the heatmap
            heatmap_1 = [];
            dendrogram_col_1 = {};
            dendrogram_row_1 = {};
            # extract out the data {rxn_id:[{},...],...} -> [[{},...],...] -> [{},...]
            data1 = [v for v in data_heatmap[fu].values()];
            data2=[];
            for d in data1:
                data2.extend(d);
            if order_rxnBySim_I:
                heatmap_1,dendrogram_col_1,dendrogram_row_1 = calculateheatmap.make_heatmap(data2,
                    'rxn_id','simulation_id','flux',
                    row_pdist_metric_I=row_pdist_metric_I,row_linkage_method_I=row_linkage_method_I,
                    col_pdist_metric_I=col_pdist_metric_I,col_linkage_method_I=col_linkage_method_I,
                    filter_rows_I=rxn_ids_I,
                    filter_columns_I=simulation_ids_I,
                    order_rowsFromTemplate_I=rxn_ids_I,
                    order_columnsFromTemplate_I=simulation_ids_I,);
            else:
                heatmap_1,dendrogram_col_1,dendrogram_row_1 = calculateheatmap.make_heatmap(data2,
                    'simulation_id','rxn_id','flux',
                    row_pdist_metric_I=row_pdist_metric_I,row_linkage_method_I=row_linkage_method_I,
                    col_pdist_metric_I=col_pdist_metric_I,col_linkage_method_I=col_linkage_method_I,
                    filter_rows_I=simulation_ids_I,
                    filter_columns_I=rxn_ids_I,
                    order_rowsFromTemplate_I=simulation_ids_I,
                    order_columnsFromTemplate_I=rxn_ids_I,);
            # add data to to the database for the heatmap
            for d in heatmap_1:
                d['analysis_id']=analysis_id_I;
                d['value_units']=fu;
                d['used_']=True;
                d['comment_']=None;
                heatmap_O.append(d);
            # add data to the database for the dendrograms
            dendrogram_col_1['analysis_id']=analysis_id_I;
            dendrogram_col_1['value_units']=fu;
            dendrogram_col_1['used_']=True;
            dendrogram_col_1['comment_']=None;
            dendrogram_col_O.append(dendrogram_col_1);
            dendrogram_row_1['analysis_id']=analysis_id_I;
            dendrogram_row_1['value_units']=fu;
            dendrogram_row_1['used_']=True;
            dendrogram_row_1['comment_']=None;
            dendrogram_row_O.append(dendrogram_row_1);
        self.add_rows_table('data_stage02_isotopomer_heatmap',heatmap_O);
        self.add_rows_table('data_stage02_isotopomer_dendrogram',dendrogram_col_O);
        self.add_rows_table('data_stage02_isotopomer_dendrogram',dendrogram_row_O);
 def execute_heatmap_lineage(self, analysis_id_I,
             row_pdist_metric_I='euclidean',row_linkage_method_I='complete',
             col_pdist_metric_I='euclidean',col_linkage_method_I='complete',
                                              mutation_id_exclusion_list = []):
     '''Execute hierarchical cluster on row and column data'''
     
     calculateheatmap = calculate_heatmap();
     print('executing heatmap...');
     # get the analysis information
     experiment_ids,lineage_names = [],[];
     experiment_ids,lineage_names = self.get_experimentIDAndLineageName_analysisID_dataStage01ResequencingAnalysis(analysis_id_I);
     # partition into variables:
     intermediates_lineage = [];
     mutation_data_lineage_all = [];
     rows_lineage = [];
     n_lineages = len(lineage_names)
     cnt_sample_names = 0;
     for lineage_name_cnt,lineage_name in enumerate(lineage_names):
         # get ALL intermediates by experiment_id and lineage name
         intermediates = [];
         intermediates = self.get_intermediates_experimentIDAndLineageName_dataStage01ResequencingLineage(experiment_ids[lineage_name_cnt],lineage_name);
         intermediates_lineage.append(intermediates);
         cnt_sample_names += len(intermediates)
         # get ALL mutation data by experiment_id and lineage name
         mutation_data = [];
         mutation_data = self.get_mutationData_experimentIDAndLineageName_dataStage01ResequencingLineage(experiment_ids[lineage_name_cnt],lineage_name);
         mutation_data_lineage_all.extend(mutation_data);
         # get ALL mutation frequencies by experiment_id and lineage name
         rows = [];
         rows = self.get_row_experimentIDAndLineageName_dataStage01ResequencingLineage(experiment_ids[lineage_name_cnt],lineage_name)
         rows_lineage.extend(rows);
     mutation_data_lineage_unique = list(set(mutation_data_lineage_all));
     mutation_data_lineage = [x for x in mutation_data_lineage_unique if not x in mutation_id_exclusion_list];
     min_inter = min(intermediates_lineage)
     max_inter = max(intermediates_lineage);
     # generate the frequency matrix data structure (mutation x intermediate)
     data_O = numpy.zeros((cnt_sample_names,len(mutation_data_lineage)));
     labels_O = {};
     lineages=[];
     col_cnt = 0;
     # order 2: groups each lineage by mutation (intermediate x mutation)
     for lineage_name_cnt,lineage_name in enumerate(lineage_names): #all lineages for intermediate j / mutation i
         for intermediate_cnt,intermediate in enumerate(intermediates_lineage[lineage_name_cnt]):
             if intermediate_cnt == min(intermediates_lineage[lineage_name_cnt]):
                 lineages.append(lineage_name+": "+"start"); # corresponding label from hierarchical clustering (in this case, arbitrary)
             elif intermediate_cnt == max(intermediates_lineage[lineage_name_cnt]):
                 lineages.append(lineage_name+": "+"end"); # corresponding label from hierarchical clustering (in this case, arbitrary)
             else:
                 lineages.append(lineage_name+": "+str(intermediate)); # corresponding label from hierarchical clustering (in this case, arbitrary)
             for mutation_cnt,mutation in enumerate(mutation_data_lineage): #all mutations i for intermediate j
                 for row in rows_lineage:
                     if row['mutation_id'] == mutation and row['intermediate'] == intermediate and row['lineage_name'] == lineage_name:
                         data_O[col_cnt,mutation_cnt] = row['mutation_frequency'];
                         #print col_cnt,mutation_cnt
             col_cnt+=1;
     # generate the clustering for the heatmap
     heatmap_O = [];
     dendrogram_col_O = {};
     dendrogram_row_O = {};
     heatmap_O,dendrogram_col_O,dendrogram_row_O = calculateheatmap.heatmap(data_O,lineages,mutation_data_lineage,
             row_pdist_metric_I=row_pdist_metric_I,row_linkage_method_I=row_linkage_method_I,
             col_pdist_metric_I=col_pdist_metric_I,col_linkage_method_I=col_linkage_method_I);
     # add data to to the database for the heatmap
     for d in heatmap_O:
         row = None;
         row = data_stage01_resequencing_heatmap(
             analysis_id_I,
             d['col_index'],
             d['row_index'],
             d['value'],
             d['col_leaves'],
             d['row_leaves'],
             d['col_label'],
             d['row_label'],
             d['col_pdist_metric'],
             d['row_pdist_metric'],
             d['col_linkage_method'],
             d['row_linkage_method'],
             'frequency',True, None);
         self.session.add(row);
     # add data to the database for the dendrograms
     row = None;
     row = data_stage01_resequencing_dendrogram(
         analysis_id_I,
         dendrogram_col_O['leaves'],
         dendrogram_col_O['icoord'],
         dendrogram_col_O['dcoord'],
         dendrogram_col_O['ivl'],
         dendrogram_col_O['colors'],
         dendrogram_col_O['pdist_metric'],
         dendrogram_col_O['pdist_metric'],
         'frequency',True, None);
     self.session.add(row);
     row = None;
     row = data_stage01_resequencing_dendrogram(
         analysis_id_I,
         dendrogram_row_O['leaves'],
         dendrogram_row_O['icoord'],
         dendrogram_row_O['dcoord'],
         dendrogram_row_O['ivl'],
         dendrogram_row_O['colors'],
         dendrogram_row_O['pdist_metric'],
         dendrogram_row_O['pdist_metric'],
         'frequency',True, None);
     self.session.add(row);
     self.session.commit();
    def execute_heatmap(self, analysis_id_I,gene_exclusion_list=[],
                row_pdist_metric_I='euclidean',row_linkage_method_I='complete',
                col_pdist_metric_I='euclidean',col_linkage_method_I='complete',
                order_sampleNameByGeneNameShort_I=False,
                sample_names_I=[],
                gene_name_shorts_I=[],):
        '''Execute hierarchical cluster on row and column data'''

        print('executing heatmap...');
        calculateheatmap =  calculate_heatmap();
        #fpkmsheatmap =  fpkms_heatmap();
        # get the analysis information
        experiment_ids,sample_names = [],[];
        experiment_ids,sample_names = self.get_experimentIDAndSampleName_analysisID_dataStage01RNASequencingAnalysis(analysis_id_I);
        fpkms_all = [];
        for sample_name_cnt,sample_name in enumerate(sample_names):
            # query fpkm data:
            fpkms = [];
            fpkms = self.get_rows_experimentIDAndSampleName_dataStage01RNASequencingGenesFpkmTracking(experiment_ids[sample_name_cnt],sample_name);
            fpkms_all.extend(fpkms);
        heatmap_O = [];
        dendrogram_col_O = [];
        dendrogram_row_O = [];
        if order_sampleNameByGeneNameShort_I:
            heatmap_1,dendrogram_col_1,dendrogram_row_1 = calculateheatmap.make_heatmap(fpkms_all,
                'sample_name','gene_short_name','FPKM',
                row_pdist_metric_I=row_pdist_metric_I,row_linkage_method_I=row_linkage_method_I,
                col_pdist_metric_I=col_pdist_metric_I,col_linkage_method_I=col_linkage_method_I,
                filter_rows_I=sample_names_I,
                filter_columns_I=gene_name_shorts_I,
                order_rowsFromTemplate_I=sample_names_I,
                order_columnsFromTemplate_I=gene_name_shorts_I,);
        else:
            heatmap_1,dendrogram_col_1,dendrogram_row_1 = calculateheatmap.make_heatmap(fpkms_all,
                'gene_short_name','sample_name','FPKM',
                row_pdist_metric_I=row_pdist_metric_I,row_linkage_method_I=row_linkage_method_I,
                col_pdist_metric_I=col_pdist_metric_I,col_linkage_method_I=col_linkage_method_I,
                filter_rows_I=gene_name_shorts_I,
                filter_columns_I=sample_names_I,
                order_rowsFromTemplate_I=gene_name_shorts_I,
                order_columnsFromTemplate_I=sample_names_I);
        # add data to to the database for the heatmap
        for d in heatmap_1:
            d['analysis_id']=analysis_id_I;
            d['value_units']='FPKM';
            d['used_']=True;
            d['comment_']=None;
            heatmap_O.append(d);
        ## add data to the database for the dendrograms
        dendrogram_col_1['analysis_id']=analysis_id_I;
        dendrogram_col_1['value_units']='FPKM';
        dendrogram_col_1['used_']=True;
        dendrogram_col_1['comment_']=None;
        dendrogram_col_O.append(dendrogram_col_1);
        dendrogram_row_1['analysis_id']=analysis_id_I;
        dendrogram_row_1['value_units']='FPKM';
        dendrogram_row_1['used_']=True;
        dendrogram_row_1['comment_']=None;
        dendrogram_row_O.append(dendrogram_row_1);
        self.add_rows_table('data_stage01_rnasequencing_heatmap',heatmap_O);
        #self.add_rows_table('data_stage01_rnasequencing',dendrogram_col_O);
        #self.add_rows_table('data_stage01_rnasequencing',dendrogram_row_O);
    def execute_heatmap_mutationsAnnotated(self, analysis_id_I,mutation_id_exclusion_list=[],frequency_threshold=0.1,max_position=4630000,
                row_pdist_metric_I='euclidean',row_linkage_method_I='complete',
                col_pdist_metric_I='euclidean',col_linkage_method_I='complete',
                order_sampleNameByMutationID_I=False,
                sample_names_I=[],
                mutationIDs_I=[],
                ):
        '''Execute hierarchical cluster on row and column data'''
        calculateheatmap = calculate_heatmap();
        mutationsheatmap =  mutations_heatmap();
        genomediff = genome_diff();

        print('executing heatmap...');
        # get the analysis information
        experiment_ids,sample_names = [],[];
        experiment_ids,sample_names = self.get_experimentIDAndSampleName_analysisID_dataStage01ResequencingAnalysis(analysis_id_I);
        #mutations_all = [];
        mutation_data_O = [];
        for sample_name_cnt,sample_name in enumerate(sample_names):
            # query mutation data:
            mutations = [];
            mutations = self.get_mutations_experimentIDAndSampleName_dataStage01ResequencingMutationsAnnotated(experiment_ids[sample_name_cnt],sample_name);
            #mutations_all.extend(mutations);
            for mutation in mutations:
                if mutation['mutation_position'] > max_position:
                    continue;
                if mutation['mutation_frequency']<frequency_threshold:
                    continue;
                #if not mutation['mutation_genes']:
                #    mutation['mutation_genes'] = ['unknown'];
                # mutation id
                mutation_id = genomediff._make_mutationID(mutation['mutation_genes'],mutation['mutation_type'],mutation['mutation_position']);
                if mutation_id in mutation_id_exclusion_list:
                    continue;
                tmp = {};
                tmp.update(mutation);
                tmp.update({'mutation_id':mutation_id});
                mutation_data_O.append(tmp);
        heatmap_O = [];
        dendrogram_col_O = {};
        dendrogram_row_O = {};
        if order_sampleNameByMutationID_I:
            heatmap_O,dendrogram_col_O,dendrogram_row_O = calculateheatmap.make_heatmap(mutation_data_O,
                'sample_name','mutation_id','mutation_frequency',
                row_pdist_metric_I=row_pdist_metric_I,row_linkage_method_I=row_linkage_method_I,
                col_pdist_metric_I=col_pdist_metric_I,col_linkage_method_I=col_linkage_method_I,
                filter_rows_I=sample_names_I,
                filter_columns_I=mutationIDs_I,
                order_rowsFromTemplate_I=sample_names_I,
                order_columnsFromTemplate_I=mutationIDs_I,);
        else:
            heatmap_O,dendrogram_col_O,dendrogram_row_O = calculateheatmap.make_heatmap(mutation_data_O,
                'mutation_id','sample_name','mutation_frequency',
                row_pdist_metric_I=row_pdist_metric_I,row_linkage_method_I=row_linkage_method_I,
                col_pdist_metric_I=col_pdist_metric_I,col_linkage_method_I=col_linkage_method_I,
                filter_rows_I=mutationIDs_I,
                filter_columns_I=sample_names_I,
                order_rowsFromTemplate_I=mutationIDs_I,
                order_columnsFromTemplate_I=sample_names_I);
        ## generate the clustering for the heatmap
        #mutationsheatmap.mutations = mutations_all;
        #mutationsheatmap.sample_names = sample_names;
        #mutationsheatmap.make_heatmap(mutation_id_exclusion_list=mutation_id_exclusion_list,max_position=max_position,
        #        row_pdist_metric_I=row_pdist_metric_I,row_linkage_method_I=row_linkage_method_I,
        #        col_pdist_metric_I=col_pdist_metric_I,col_linkage_method_I=col_linkage_method_I)
        #heatmap_O = mutationsheatmap.heatmap;
        #dendrogram_col_O = mutationsheatmap.dendrogram_col;
        #dendrogram_row_O = mutationsheatmap.dendrogram_row;
        # add data to to the database for the heatmap
        for d in heatmap_O:
            row = None;
            row = data_stage01_resequencing_heatmap(
                analysis_id_I,
                d['col_index'],
                d['row_index'],
                d['value'],
                d['col_leaves'],
                d['row_leaves'],
                d['col_label'],
                d['row_label'],
                d['col_pdist_metric'],
                d['row_pdist_metric'],
                d['col_linkage_method'],
                d['row_linkage_method'],
                'frequency',True, None);
            self.session.add(row);
        # add data to the database for the dendrograms
        row = None;
        row = data_stage01_resequencing_dendrogram(
            analysis_id_I,
            dendrogram_col_O['leaves'],
            dendrogram_col_O['icoord'],
            dendrogram_col_O['dcoord'],
            dendrogram_col_O['ivl'],
            dendrogram_col_O['colors'],
            dendrogram_col_O['pdist_metric'],
            dendrogram_col_O['pdist_metric'],
            'frequency',True, None);
        self.session.add(row);
        row = None;
        row = data_stage01_resequencing_dendrogram(
            analysis_id_I,
            dendrogram_row_O['leaves'],
            dendrogram_row_O['icoord'],
            dendrogram_row_O['dcoord'],
            dendrogram_row_O['ivl'],
            dendrogram_row_O['colors'],
            dendrogram_row_O['pdist_metric'],
            dendrogram_row_O['pdist_metric'],
            'frequency',True, None);
        self.session.add(row);
        self.session.commit();