def save_tables_show_graph(self, desk):

        if desk.each_all == 'each':
            bias_corr_list = [desk.withCorrection]
        else:
            bias_corr_list = [False, True]

        print("-----------------------------------------------------------")
        for bias_corr in bias_corr_list:
            iLoop = 0
            speciesParams = []
            listMI_Anova = []

            for mat in self.list_species_name_hmi:
                iLoop += 1
                filename, name, species, hmi, sufix = mat

                mat = desk.dicParams[species]

                if desk.minmax == 'mincut':
                    numOfSeqs = mat[2]
                    L = mat[5]
                else:
                    numOfSeqs = mat[3]
                    L = mat[6]

                print("%i) %s - %s, %s" %
                      (iLoop, name, desk.minmax,
                       "bias corr." if bias_corr else 'no corr.'))

                if desk.organism == 'Drosophila':
                    sp = self.dr.mnemonic(species.replace('Drosophila ', ''))
                else:
                    sp = species

                desk.withCorrection = bias_corr

                if desk.withCorrection:
                    str_correction = ' (bias corr.)'
                    filename_correction = '_bias_corr'
                else:
                    str_correction = ''
                    filename_correction = ''

                if desk.frame == 0:
                    sFrame = ''
                else:
                    sFrame = ', frame=' + str(desk.frame)

                std_rand = "" if (desk.label_random
                                  == "") else " " + desk.label_random


                title = 'HMI: %s %s, %s %s\n%s%s%s; %i seqs; len=%i%s; #letter=%i'%\
                        (desk.organism, species, desk.seqType, desk.gene_title, desk.minmax, str_correction, std_rand, numOfSeqs, L, sFrame, desk.numOfLetters)
                ''' HMI mean and sdv for each k from n sequences '''
                if not desk.withCorrection:
                    arrayMIthis = np.array(hmi.arrayMI)
                    arraySE = np.array(hmi.arraySE)
                else:
                    arrayMIthis = np.array(hmi.arrayMIcorr)
                    arraySE = np.array(hmi.arraySEcorr)

                if desk.mnat:
                    arrayMIthis *= 1000
                    arraySE *= 1000

                    roundVal = 1
                    unit = 'mnat'
                else:
                    roundVal = 4
                    unit = 'nat'

                if desk.norm:
                    arrayMIthis /= desk.numOfLetters
                    arraySE /= desk.numOfLetters

                # desk.showmsg_obs('  >>> species %s with %i sequences'%(species, numOfSeqs))
                '''  first time build abcissa
                         if choose == 1 and desk.frame == 1
                         or desk.frame == 0
                '''
                xSeq = []

                if desk.frame < 2:
                    for k in range(len(arrayMIthis)):
                        xSeq.append(k + 3)
                else:
                    ''' frame1: k = {3,6,9, ... 3n} '''
                    for k in range(len(arrayMIthis)):
                        xSeq.append((k + 1) * 3)

                arrayVal = []
                ySeqSEFinal = []
                ''' if only one curve '''
                if desk.frame < 2:
                    arrayVal = arrayMIthis
                    ySeqSEFinal = arraySE
                else:
                    ''' if choose > 1 there are 3 others curves, and k jumps 3 x 3 '''
                    for m in range(len(arrayMIthis)):
                        arrayVal.append(arrayMIthis[m])
                        arrayVal.append(arrayMIthis[m])
                        arrayVal.append(arrayMIthis[m])

                        ySeqSEFinal.append(arraySE[m])
                        ySeqSEFinal.append(arraySE[m])
                        ySeqSEFinal.append(arraySE[m])

                self.meanY = np.round(np.mean(arrayVal), roundVal)
                self.medianY = np.round(np.median(arrayVal), roundVal)
                self.stdY = np.round(np.std(arrayVal), roundVal)
                self.maxY = np.round(np.max(arrayVal), roundVal)
                self.minY = np.round(np.min(arrayVal), roundVal)

                speciesParams.append([
                    species, sp, numOfSeqs, self.meanY, self.medianY,
                    self.stdY, self.maxY, self.minY
                ])
                listMI_Anova.append(arrayVal)

                if desk.showGraph or desk.saveGraph:
                    if desk.frame < 2:
                        gHist = graphPac.Histogram_FreqDistribution(
                            desk, title)

                        gHist.meanY = self.meanY
                        gHist.medianY = self.medianY
                        gHist.stdY = self.stdY
                        gHist.maxY = self.maxY
                        gHist.minY = self.minY

                        gHist.plot_H_MI('V',
                                        xSeq=xSeq,
                                        ySeq=arrayVal,
                                        ySE=arraySE,
                                        showError=True)

                        gHist.plot_H_MI('H',
                                        xSeq=xSeq,
                                        ySeq=arrayVal,
                                        ySE=ySeqSEFinal,
                                        showError=True)

                        gHist.densityBar("H", seq=arrayVal)
                    else:
                        gHist.sameBar(
                            xSeq=xSeq,
                            arrayMIthis=arrayVal,
                            linestyleCode=self.arrLinestyleCode[desk.frame],
                            color=self.arrColor[desk.frame])

                    if (desk.frame == 0) or (desk.frame == 3):
                        pictureName = 'HMI_%s%s' % (sufix, filename_correction)
                        gHist.myPlot.print_graph(desk,
                                                 gHist.fig,
                                                 pictureName,
                                                 frame=desk.tk_root)
                    '''
                    del gHist
                    plt.cla()
                    plt.clf()
                    gc.collect()
                    '''

            if desk.saveData:
                sufix = ('%s_%s_%s_%s_frame%i_NOL%i_%iL_cutoff%i') %\
                        (desk.organism, desk.minmax, desk.seqType, desk.gene_title, desk.frame, desk.numOfLetters, desk.cutoffLength, desk.cutoffNumSeq)

                std_rand = "" if (desk.label_random
                                  == "") else desk.stri_random

                filename = 'HMI_summary_%s%s%s.txt' % (
                    sufix, filename_correction, std_rand)
                stri = hmi.ent.calc_anova(listMI_Anova, sufix)

                hmi.ent.print_data_summary(desk,
                                           speciesParams,
                                           roundVal=roundVal,
                                           filename=filename,
                                           stri=stri)

        self.failure = False
        self.error_msg = 'Task ended. All right.'
        return True
Beispiel #2
0
    def save_tables_show_graph(self,cmp_file=""):
        bias_corr_list = [self.withCorrection]

        print("-----------------------------------------------------------")
        for bias_corr in bias_corr_list:
            iLoop = 0
            speciesParams = []
            listMI_Anova = []       
                 
            for mat in self.list_species_name_hmi:
                iLoop += 1
                filename, name, species, hmi, sufix = mat
                

                numOfSeqs = self.sampleElems
                L = self.LH0

   
                print("%i) %s - %s, %s"%(iLoop, name, self.minmax, "bias corr." if bias_corr else 'no corr.' ))
    
                sp = self.which_sp(self.organism, species)
                        
                self.withCorrection = bias_corr
                
                if self.withCorrection:
                    str_correction = ' (bias corr.)'
                    filename_correction = '_bias_corr'
                else:
                    str_correction = ''
                    filename_correction = ''        

                if self.frame == 0:
                    sFrame = ''
                else:
                    sFrame = ', frame=' + str(self.frame)
 
                title = 'HMI: %s %s, %s %s\n%s%s; %i seqs; len=%i%s; #letter=%i'%\
                        (self.organism, species, self.seqType, self.gene, self.minmax, str_correction, numOfSeqs, L, sFrame, self.numOfLetters)

                                
                ''' HMI mean and sdv for each k from n sequences '''
                if not self.withCorrection:           
                    arrayMIthis = np.array(hmi.arrayMI)
                    arraySE     = np.array(hmi.arraySE)
                else:
                    arrayMIthis = np.array(hmi.arrayMIcorr)
                    arraySE     = np.array(hmi.arraySEcorr)
    
               
                if self.mnat:
                    arrayMIthis *= 1000
                    arraySE *= 1000
    
                    roundVal = 1
                    self.unit = 'mnat'
                else:
                    roundVal = 4
                    self.unit = 'nat'
                    
                if self.norm:
                    arrayMIthis /= self.numOfLetters
                    arraySE /= self.numOfLetters
                
                # self.showmsg_obs('  >>> species %s with %i sequences'%(species, numOfSeqs))
                '''  first time build abcissa
                         if choose == 1 and self.frame == 1
                         or self.frame == 0
                '''
                xSeq = []
                
                if self.frame < 2:
                    for k in range( len(arrayMIthis)):
                        xSeq.append(k+3)
                else:
                    ''' frame1: k = {3,6,9, ... 3n} '''
                    for k in range( len(arrayMIthis)):
                        xSeq.append((k+1)*3)
                                        
           
                arrayVal = []
                ySeqSEFinal = []


                ''' if only one curve '''
                if self.frame < 2:
                    arrayVal = arrayMIthis
                    ySeqSEFinal = arraySE
                else:
                    ''' if choose > 1 there are 3 others curves, and k jumps 3 x 3 '''
                    for m in range(len(arrayMIthis)):
                        arrayVal.append(arrayMIthis[m])
                        arrayVal.append(arrayMIthis[m])
                        arrayVal.append(arrayMIthis[m])
                        
                        ySeqSEFinal.append(arraySE[m])
                        ySeqSEFinal.append(arraySE[m])
                        ySeqSEFinal.append(arraySE[m])
    
                self.meanY = np.round(np.mean(arrayVal), roundVal)
                self.medianY = np.round(np.median(arrayVal), roundVal)
                self.stdY  = np.round(np.std(arrayVal), roundVal)
                self.maxY = np.round(np.max(arrayVal), roundVal)
                self.minY = np.round(np.min(arrayVal), roundVal)
                
                            
                speciesParams.append([species,sp,numOfSeqs, self.meanY, self.medianY, self.stdY, self.maxY, self.minY])
                listMI_Anova.append(arrayVal)
                
                if self.showGraph or self.saveGraph:
                    if self.frame < 2:
                        gHist = graphPac.Histogram_FreqDistribution(self, title)
                        
                        gHist.meanY = self.meanY
                        gHist.medianY = self.medianY
                        gHist.stdY  = self.stdY
                        gHist.maxY = self.maxY
                        gHist.minY = self.minY
                        gHist.desk = self
                                        
                        gHist.plot_H_MI('V', xSeq=xSeq, ySeq=arrayVal, ySE=arraySE, showError=True, \
                                        unit=self.unit, roundVal=roundVal)                
                        
                        gHist.plot_H_MI('H', xSeq=xSeq, ySeq=arrayVal, ySE=ySeqSEFinal, showError=True, \
                                        unit=self.unit, roundVal=roundVal)
                        
                        gHist.densityBar("H", seq=arrayVal, unit=self.unit, roundVal = roundVal)
                    else:
                        gHist.sameBar(xSeq=xSeq, arrayMIthis=arrayVal,linestyleCode=self.arrLinestyleCode[self.frame], color=self.arrColor[self.frame])
                            
                    
                    if (self.frame == 0) or (self.frame==3): 
                        ''' sample in filename '''   
                        pictureName = 'HMI_%s%s%s'%(sufix, filename_correction, cmp_file)
                        gHist.myPlot.print_graph(self, gHist.fig, pictureName, frame=self.tk_root, stay=True)
    
                    '''
                    del gHist
                    plt.cla()
                    plt.clf()
                    gc.collect()
                    '''

            if self.saveData :
                sufix = ('%s_%s_%s_%s_frame%i_NOL%i_%iL_cutoff%i') %\
                        (self.organism, self.minmax, self.seqType, self.gene, self.frame, self.numOfLetters, self.cutoffLength, self.cutoffNumSeq)

                ''' summary with sample in filename '''
                filename = 'HMI_summary_%s%s%s.txt' % (sufix, filename_correction, cmp_file)        
                stri = hmi.ent.calc_anova(listMI_Anova, sufix)
        
                hmi.ent.print_data_summary(self, speciesParams, roundVal=roundVal, filename=filename, stri=stri)

        self.failure = False
        self.error_msg = 'Task ended. All right.'
        
        ''' in the end of save_tables_show_graph clean memory '''
        del self.list_species_name_hmi
        
        try:
            gc.collect() 
        except:
            pass  
                
        return True
    def save_tables_show_graph(self, desk, which_module):

        if desk.each_all == 'each':
            list_modules = [which_module]
            bias_corr_list = [desk.withCorrection]
        else:
            list_modules = ['Entropy', 'VMI']
            bias_corr_list = [False, True]
                    
        print("-----------------------------------------------------------")
        for which_module in list_modules:
            for bias_corr in bias_corr_list:
                iLoop = 0
                listMI_Anova = []
                speciesParams = []
                
                for mat in self.list_species_name_vmi:
                    iLoop += 1
                    filename, name, species, vmi, sufix = mat
                    
                    mat = desk.dicParams[species]
        
                    if desk.minmax == 'mincut':
                        numOfSeqs = mat[2]
                        L = mat[5]
                    else:
                        numOfSeqs = mat[3]
                        L = mat[6]
       
                    print("%i) %s - %s %s %s"%(iLoop, name, desk.minmax, "bias corr." if bias_corr else 'no corr.', which_module ))
        
                    if desk.organism == 'Drosophila':
                        sp = self.dr.mnemonic(species.replace('Drosophila ',''))
                    else:
                        sp = species
                            
                    desk.withCorrection = bias_corr
                    
                    if desk.withCorrection:
                        str_correction = ' (bias corr.)'
                        filename_correction = '_bias_corr'
                    else:
                        str_correction = ''
                        filename_correction = ''        
    
                    std_rand = "" if (desk.label_random == "") else " " + desk.label_random
                    
                    if which_module == 'Entropy':
                        title = 'Entropy Distribution %s %s, %s %s\n%s%s%s; %i seqs; len=%i; #letter=%i'%\
                            (desk.organism, species, desk.seqType, desk.gene_title, desk.minmax, str_correction, std_rand, numOfSeqs, L,  desk.numOfLetters)
                    else:
                        title = 'VMI Heat Map %s %s, %s %s %s%s%s\n%i seqs; len=%i; #letter=%i'%\
                            (desk.organism, species, desk.seqType, desk.gene_title, desk.minmax, str_correction, std_rand, numOfSeqs, L,  desk.numOfLetters)
            
            
                    if which_module == 'Entropy':
                        if not desk.withCorrection:           
                            arrayVal = np.array(vmi.HShannonList)
                            arraySE = np.array(vmi.SeHShannonList)
                        else:
                            arrayVal = np.array(vmi.HShannonCorrList)
                            arraySE = np.array(vmi.SeHShannonCorrList)
                    else:
                        if not desk.withCorrection:           
                            arrayVal = np.array(vmi.MIlist)
                            arraySE = np.array(vmi.SeMIList)
                        else:
                            arrayVal = np.array(vmi.MIcorrList)
                            arraySE = np.array(vmi.SeMICorrList)
        
                    ''' normalization dividing by numOfLetters '''
                    if desk.norm:
                        arrayVal /= desk.numOfLetters
                        arraySE /= desk.numOfLetters
        
                    ''' mili nats '''
                    if desk.mnat:
                        arrayVal *= 1000
                        arraySE *= 1000
                        
                        roundVal = 2
                    else:
                        roundVal = 4
        
                    # print '--- params -----------------'
                    maxMI = 0
                    maxiPos = None
                    
                    is_zero = True
                    
                    for pos in range(len(arrayVal)):
                        if arrayVal[pos] > maxMI:
                            maxMI = arrayVal[pos]
                            SE = arraySE[pos]
                            i,j = vmi.ijList[pos]
                            maxiPos = [i,j,maxMI,SE]
                            is_zero = False
        
                    if is_zero:
                        stri = '### Species %s has MI = ZERO. Too conserved data sequences. Impossible to include in analysis.'%(species)
                        print(stri)
                        continue
                        
                    xSeq = [x for x in range(len(arrayVal))]
        
                    self.meanY = np.round(np.mean(arrayVal), roundVal)
                    self.medianY = np.round(np.median(arrayVal), roundVal)
                    self.stdY  = np.round(np.std(arrayVal), roundVal)
                    self.maxY = np.round(np.max(arrayVal), roundVal)
                    self.minY = np.round(np.min(arrayVal), roundVal)
        
                    speciesParams.append([species,sp,numOfSeqs, self.meanY, self.medianY, self.stdY, self.maxY, self.minY])
                    
                    if desk.saveData:
                        listMI_Anova.append(arrayVal)
        
                    if desk.showGraph or desk.saveGraph:
                        if which_module == 'Entropy':
                            gHist = graphPac.Histogram_FreqDistribution(desk, title)
                            
                            gHist.meanY = self.meanY
                            gHist.medianY = self.medianY
                            gHist.stdY  = self.stdY
                            gHist.maxY = self.maxY
                            gHist.minY = self.minY
                                            
                            gHist.plot_H_MI('V', xSeq=xSeq, ySeq=arrayVal, ySE=arraySE, showError=True)
                            gHist.densityBar('V', seq=arrayVal)
                            
                            pictureName = 'VHShannon_%s%s%s'%(sufix, filename_correction, desk.stri_random)
                            gHist.myPlot.print_graph(desk, gHist.fig, pictureName, frame=desk.tk_root)
                        else:
                            ''' 3D dont has ceil '''
                            if desk.is3D:
                                limSup = self.maxY
                            else:
                                ceil = desk.heatmap_ceil_value
                                
                                ''' the same roof for all heatmaps: parametrize in future '''
                                if desk.heatmap_ceil:
                                    if self.maxY <= ceil:
                                        limSup = ceil
                                    else:
                                        limSup = self.maxY
                                else:
                                    limSup = self.maxY
                            
                            ''''  updated 28/09/2015 '''
                            gMI = vmi.plotHeatMap(desk, desk.is3D, arrayVal, vmi.ijList, L, maxiPos, title, species=species, limSup=limSup, roundVal=roundVal, str_correction=str_correction)
                 
                            if desk.is3D:
                                pictureName = 'HeatMap_3D_VMI_%s%s%s'%(sufix, filename_correction,desk.stri_random)
                            else:
                                gMI.densityHeatmapBar(arrayVal, limSup)
                                pictureName = 'HeatMap_2D_VMI_%s%s%s'%(sufix, filename_correction,desk.stri_random)

                            gMI.myPlot.print_graph(desk, gMI.fig, pictureName, frame=desk.tk_root)
        
                            '''
                            plt.cla()
                            plt.clf()
                            plt.close()
                            del gMI
                            gc.collect()
                            '''

                # Flavio 02/06/2015
                if which_module == 'Entropy':
                    which_symb = 'VHShannon'
                else:
                    which_symb = 'VMI'
                    
                    
                if desk.saveData:
                    sufix = ('%s_%s_%s_%s_NOL%i_%iL_cutoff%i') %\
                            (desk.organism, desk.minmax, desk.seqType, desk.gene_title, desk.numOfLetters, desk.cutoffLength, desk.cutoffNumSeq)
            
                    filename = '%s_summary_%s%s%s.txt' % (which_symb, sufix, filename_correction, desk.stri_random)
                    stri = vmi.ent.calc_anova(listMI_Anova, sufix)
                    # desk, speciesParams, roundVal=4, filename=None, stri = '', saveData=False):
                    vmi.ent.print_data_summary(desk, speciesParams, roundVal=roundVal, filename=filename, stri=stri)