Esempio n. 1
0
    def __init__(self, desk):
        self.desk = desk
        self.dr = desk.dr
        ''' ------------------------------------------------------- '''
        self.entFile = bioClass.Entropy_File()
        self.gg = graphPac.GeneralGraphic(desk)

        self.minmax = desk.minmax
Esempio n. 2
0
'''
Created on Sep 14, 2015

@author: Flavio Lichtenstein
@local: Unifesp DIS - Bioinformatica

for each species calc if it is p-value < .05 on its JSD distribution
'''
import classes.BarGraphic as graphPac
gg = graphPac.GeneralGraphic()

import tests.Desktop_test as Desktop_test
import random
import classes.Drosophila as dro

organism = "Drosophila"
gene = "Adh"
title = ''
cutoffLength = 100
cutoffNumSeq = 10

filename_default = 'default.ini'

if organism == "Drosophila":
    dr = dro.Drosophila()
else:
    dr = None

desk = Desktop_test.Desktop(filename_default, organism, gene, title,
                            cutoffLength, cutoffNumSeq)
Esempio n. 3
0
    def save_tables_show_graph(self,cmp_file=""):
        bias_corr_list = [self.withCorrection]

        print("-----------------------------------------------------------")
        for bias_corr in bias_corr_list:
            iLoop = 0
            speciesParams = []
            listMI_Anova = []       
                 
            for mat in self.list_species_name_hmi:
                iLoop += 1
                filename, name, species, hmi, sufix = mat
                

                numOfSeqs = self.sampleElems
                L = self.LH0

   
                print("%i) %s - %s, %s"%(iLoop, name, self.minmax, "bias corr." if bias_corr else 'no corr.' ))
    
                sp = self.which_sp(self.organism, species)
                        
                self.withCorrection = bias_corr
                
                if self.withCorrection:
                    str_correction = ' (bias corr.)'
                    filename_correction = '_bias_corr'
                else:
                    str_correction = ''
                    filename_correction = ''        

                if self.frame == 0:
                    sFrame = ''
                else:
                    sFrame = ', frame=' + str(self.frame)
 
                title = 'HMI: %s %s, %s %s\n%s%s; %i seqs; len=%i%s; #letter=%i'%\
                        (self.organism, species, self.seqType, self.gene, self.minmax, str_correction, numOfSeqs, L, sFrame, self.numOfLetters)

                                
                ''' HMI mean and sdv for each k from n sequences '''
                if not self.withCorrection:           
                    arrayMIthis = np.array(hmi.arrayMI)
                    arraySE     = np.array(hmi.arraySE)
                else:
                    arrayMIthis = np.array(hmi.arrayMIcorr)
                    arraySE     = np.array(hmi.arraySEcorr)
    
               
                if self.mnat:
                    arrayMIthis *= 1000
                    arraySE *= 1000
    
                    roundVal = 1
                    self.unit = 'mnat'
                else:
                    roundVal = 4
                    self.unit = 'nat'
                    
                if self.norm:
                    arrayMIthis /= self.numOfLetters
                    arraySE /= self.numOfLetters
                
                # self.showmsg_obs('  >>> species %s with %i sequences'%(species, numOfSeqs))
                '''  first time build abcissa
                         if choose == 1 and self.frame == 1
                         or self.frame == 0
                '''
                xSeq = []
                
                if self.frame < 2:
                    for k in range( len(arrayMIthis)):
                        xSeq.append(k+3)
                else:
                    ''' frame1: k = {3,6,9, ... 3n} '''
                    for k in range( len(arrayMIthis)):
                        xSeq.append((k+1)*3)
                                        
           
                arrayVal = []
                ySeqSEFinal = []


                ''' if only one curve '''
                if self.frame < 2:
                    arrayVal = arrayMIthis
                    ySeqSEFinal = arraySE
                else:
                    ''' if choose > 1 there are 3 others curves, and k jumps 3 x 3 '''
                    for m in range(len(arrayMIthis)):
                        arrayVal.append(arrayMIthis[m])
                        arrayVal.append(arrayMIthis[m])
                        arrayVal.append(arrayMIthis[m])
                        
                        ySeqSEFinal.append(arraySE[m])
                        ySeqSEFinal.append(arraySE[m])
                        ySeqSEFinal.append(arraySE[m])
    
                self.meanY = np.round(np.mean(arrayVal), roundVal)
                self.medianY = np.round(np.median(arrayVal), roundVal)
                self.stdY  = np.round(np.std(arrayVal), roundVal)
                self.maxY = np.round(np.max(arrayVal), roundVal)
                self.minY = np.round(np.min(arrayVal), roundVal)
                
                            
                speciesParams.append([species,sp,numOfSeqs, self.meanY, self.medianY, self.stdY, self.maxY, self.minY])
                listMI_Anova.append(arrayVal)
                
                if self.showGraph or self.saveGraph:
                    if self.frame < 2:
                        gHist = graphPac.Histogram_FreqDistribution(self, title)
                        
                        gHist.meanY = self.meanY
                        gHist.medianY = self.medianY
                        gHist.stdY  = self.stdY
                        gHist.maxY = self.maxY
                        gHist.minY = self.minY
                        gHist.desk = self
                                        
                        gHist.plot_H_MI('V', xSeq=xSeq, ySeq=arrayVal, ySE=arraySE, showError=True, \
                                        unit=self.unit, roundVal=roundVal)                
                        
                        gHist.plot_H_MI('H', xSeq=xSeq, ySeq=arrayVal, ySE=ySeqSEFinal, showError=True, \
                                        unit=self.unit, roundVal=roundVal)
                        
                        gHist.densityBar("H", seq=arrayVal, unit=self.unit, roundVal = roundVal)
                    else:
                        gHist.sameBar(xSeq=xSeq, arrayMIthis=arrayVal,linestyleCode=self.arrLinestyleCode[self.frame], color=self.arrColor[self.frame])
                            
                    
                    if (self.frame == 0) or (self.frame==3): 
                        ''' sample in filename '''   
                        pictureName = 'HMI_%s%s%s'%(sufix, filename_correction, cmp_file)
                        gHist.myPlot.print_graph(self, gHist.fig, pictureName, frame=self.tk_root, stay=True)
    
                    '''
                    del gHist
                    plt.cla()
                    plt.clf()
                    gc.collect()
                    '''

            if self.saveData :
                sufix = ('%s_%s_%s_%s_frame%i_NOL%i_%iL_cutoff%i') %\
                        (self.organism, self.minmax, self.seqType, self.gene, self.frame, self.numOfLetters, self.cutoffLength, self.cutoffNumSeq)

                ''' summary with sample in filename '''
                filename = 'HMI_summary_%s%s%s.txt' % (sufix, filename_correction, cmp_file)        
                stri = hmi.ent.calc_anova(listMI_Anova, sufix)
        
                hmi.ent.print_data_summary(self, speciesParams, roundVal=roundVal, filename=filename, stri=stri)

        self.failure = False
        self.error_msg = 'Task ended. All right.'
        
        ''' in the end of save_tables_show_graph clean memory '''
        del self.list_species_name_hmi
        
        try:
            gc.collect() 
        except:
            pass  
                
        return True
    def save_tables_show_graph(self, desk):

        if desk.each_all == 'each':
            bias_corr_list = [desk.withCorrection]
        else:
            bias_corr_list = [False, True]

        print("-----------------------------------------------------------")
        for bias_corr in bias_corr_list:
            iLoop = 0
            speciesParams = []
            listMI_Anova = []

            for mat in self.list_species_name_hmi:
                iLoop += 1
                filename, name, species, hmi, sufix = mat

                mat = desk.dicParams[species]

                if desk.minmax == 'mincut':
                    numOfSeqs = mat[2]
                    L = mat[5]
                else:
                    numOfSeqs = mat[3]
                    L = mat[6]

                print("%i) %s - %s, %s" %
                      (iLoop, name, desk.minmax,
                       "bias corr." if bias_corr else 'no corr.'))

                if desk.organism == 'Drosophila':
                    sp = self.dr.mnemonic(species.replace('Drosophila ', ''))
                else:
                    sp = species

                desk.withCorrection = bias_corr

                if desk.withCorrection:
                    str_correction = ' (bias corr.)'
                    filename_correction = '_bias_corr'
                else:
                    str_correction = ''
                    filename_correction = ''

                if desk.frame == 0:
                    sFrame = ''
                else:
                    sFrame = ', frame=' + str(desk.frame)

                std_rand = "" if (desk.label_random
                                  == "") else " " + desk.label_random


                title = 'HMI: %s %s, %s %s\n%s%s%s; %i seqs; len=%i%s; #letter=%i'%\
                        (desk.organism, species, desk.seqType, desk.gene_title, desk.minmax, str_correction, std_rand, numOfSeqs, L, sFrame, desk.numOfLetters)
                ''' HMI mean and sdv for each k from n sequences '''
                if not desk.withCorrection:
                    arrayMIthis = np.array(hmi.arrayMI)
                    arraySE = np.array(hmi.arraySE)
                else:
                    arrayMIthis = np.array(hmi.arrayMIcorr)
                    arraySE = np.array(hmi.arraySEcorr)

                if desk.mnat:
                    arrayMIthis *= 1000
                    arraySE *= 1000

                    roundVal = 1
                    unit = 'mnat'
                else:
                    roundVal = 4
                    unit = 'nat'

                if desk.norm:
                    arrayMIthis /= desk.numOfLetters
                    arraySE /= desk.numOfLetters

                # desk.showmsg_obs('  >>> species %s with %i sequences'%(species, numOfSeqs))
                '''  first time build abcissa
                         if choose == 1 and desk.frame == 1
                         or desk.frame == 0
                '''
                xSeq = []

                if desk.frame < 2:
                    for k in range(len(arrayMIthis)):
                        xSeq.append(k + 3)
                else:
                    ''' frame1: k = {3,6,9, ... 3n} '''
                    for k in range(len(arrayMIthis)):
                        xSeq.append((k + 1) * 3)

                arrayVal = []
                ySeqSEFinal = []
                ''' if only one curve '''
                if desk.frame < 2:
                    arrayVal = arrayMIthis
                    ySeqSEFinal = arraySE
                else:
                    ''' if choose > 1 there are 3 others curves, and k jumps 3 x 3 '''
                    for m in range(len(arrayMIthis)):
                        arrayVal.append(arrayMIthis[m])
                        arrayVal.append(arrayMIthis[m])
                        arrayVal.append(arrayMIthis[m])

                        ySeqSEFinal.append(arraySE[m])
                        ySeqSEFinal.append(arraySE[m])
                        ySeqSEFinal.append(arraySE[m])

                self.meanY = np.round(np.mean(arrayVal), roundVal)
                self.medianY = np.round(np.median(arrayVal), roundVal)
                self.stdY = np.round(np.std(arrayVal), roundVal)
                self.maxY = np.round(np.max(arrayVal), roundVal)
                self.minY = np.round(np.min(arrayVal), roundVal)

                speciesParams.append([
                    species, sp, numOfSeqs, self.meanY, self.medianY,
                    self.stdY, self.maxY, self.minY
                ])
                listMI_Anova.append(arrayVal)

                if desk.showGraph or desk.saveGraph:
                    if desk.frame < 2:
                        gHist = graphPac.Histogram_FreqDistribution(
                            desk, title)

                        gHist.meanY = self.meanY
                        gHist.medianY = self.medianY
                        gHist.stdY = self.stdY
                        gHist.maxY = self.maxY
                        gHist.minY = self.minY

                        gHist.plot_H_MI('V',
                                        xSeq=xSeq,
                                        ySeq=arrayVal,
                                        ySE=arraySE,
                                        showError=True)

                        gHist.plot_H_MI('H',
                                        xSeq=xSeq,
                                        ySeq=arrayVal,
                                        ySE=ySeqSEFinal,
                                        showError=True)

                        gHist.densityBar("H", seq=arrayVal)
                    else:
                        gHist.sameBar(
                            xSeq=xSeq,
                            arrayMIthis=arrayVal,
                            linestyleCode=self.arrLinestyleCode[desk.frame],
                            color=self.arrColor[desk.frame])

                    if (desk.frame == 0) or (desk.frame == 3):
                        pictureName = 'HMI_%s%s' % (sufix, filename_correction)
                        gHist.myPlot.print_graph(desk,
                                                 gHist.fig,
                                                 pictureName,
                                                 frame=desk.tk_root)
                    '''
                    del gHist
                    plt.cla()
                    plt.clf()
                    gc.collect()
                    '''

            if desk.saveData:
                sufix = ('%s_%s_%s_%s_frame%i_NOL%i_%iL_cutoff%i') %\
                        (desk.organism, desk.minmax, desk.seqType, desk.gene_title, desk.frame, desk.numOfLetters, desk.cutoffLength, desk.cutoffNumSeq)

                std_rand = "" if (desk.label_random
                                  == "") else desk.stri_random

                filename = 'HMI_summary_%s%s%s.txt' % (
                    sufix, filename_correction, std_rand)
                stri = hmi.ent.calc_anova(listMI_Anova, sufix)

                hmi.ent.print_data_summary(desk,
                                           speciesParams,
                                           roundVal=roundVal,
                                           filename=filename,
                                           stri=stri)

        self.failure = False
        self.error_msg = 'Task ended. All right.'
        return True
Esempio n. 5
0
    def save_tables_show_graph(self, desk, which_module):

        if desk.each_all == 'each':
            list_modules = [which_module]
            bias_corr_list = [desk.withCorrection]
        else:
            list_modules = ['Entropy', 'VMI']
            bias_corr_list = [False, True]
                    
        print("-----------------------------------------------------------")
        for which_module in list_modules:
            for bias_corr in bias_corr_list:
                iLoop = 0
                listMI_Anova = []
                speciesParams = []
                
                for mat in self.list_species_name_vmi:
                    iLoop += 1
                    filename, name, species, vmi, sufix = mat
                    
                    mat = desk.dicParams[species]
        
                    if desk.minmax == 'mincut':
                        numOfSeqs = mat[2]
                        L = mat[5]
                    else:
                        numOfSeqs = mat[3]
                        L = mat[6]
       
                    print("%i) %s - %s %s %s"%(iLoop, name, desk.minmax, "bias corr." if bias_corr else 'no corr.', which_module ))
        
                    if desk.organism == 'Drosophila':
                        sp = self.dr.mnemonic(species.replace('Drosophila ',''))
                    else:
                        sp = species
                            
                    desk.withCorrection = bias_corr
                    
                    if desk.withCorrection:
                        str_correction = ' (bias corr.)'
                        filename_correction = '_bias_corr'
                    else:
                        str_correction = ''
                        filename_correction = ''        
    
                    std_rand = "" if (desk.label_random == "") else " " + desk.label_random
                    
                    if which_module == 'Entropy':
                        title = 'Entropy Distribution %s %s, %s %s\n%s%s%s; %i seqs; len=%i; #letter=%i'%\
                            (desk.organism, species, desk.seqType, desk.gene_title, desk.minmax, str_correction, std_rand, numOfSeqs, L,  desk.numOfLetters)
                    else:
                        title = 'VMI Heat Map %s %s, %s %s %s%s%s\n%i seqs; len=%i; #letter=%i'%\
                            (desk.organism, species, desk.seqType, desk.gene_title, desk.minmax, str_correction, std_rand, numOfSeqs, L,  desk.numOfLetters)
            
            
                    if which_module == 'Entropy':
                        if not desk.withCorrection:           
                            arrayVal = np.array(vmi.HShannonList)
                            arraySE = np.array(vmi.SeHShannonList)
                        else:
                            arrayVal = np.array(vmi.HShannonCorrList)
                            arraySE = np.array(vmi.SeHShannonCorrList)
                    else:
                        if not desk.withCorrection:           
                            arrayVal = np.array(vmi.MIlist)
                            arraySE = np.array(vmi.SeMIList)
                        else:
                            arrayVal = np.array(vmi.MIcorrList)
                            arraySE = np.array(vmi.SeMICorrList)
        
                    ''' normalization dividing by numOfLetters '''
                    if desk.norm:
                        arrayVal /= desk.numOfLetters
                        arraySE /= desk.numOfLetters
        
                    ''' mili nats '''
                    if desk.mnat:
                        arrayVal *= 1000
                        arraySE *= 1000
                        
                        roundVal = 2
                    else:
                        roundVal = 4
        
                    # print '--- params -----------------'
                    maxMI = 0
                    maxiPos = None
                    
                    is_zero = True
                    
                    for pos in range(len(arrayVal)):
                        if arrayVal[pos] > maxMI:
                            maxMI = arrayVal[pos]
                            SE = arraySE[pos]
                            i,j = vmi.ijList[pos]
                            maxiPos = [i,j,maxMI,SE]
                            is_zero = False
        
                    if is_zero:
                        stri = '### Species %s has MI = ZERO. Too conserved data sequences. Impossible to include in analysis.'%(species)
                        print(stri)
                        continue
                        
                    xSeq = [x for x in range(len(arrayVal))]
        
                    self.meanY = np.round(np.mean(arrayVal), roundVal)
                    self.medianY = np.round(np.median(arrayVal), roundVal)
                    self.stdY  = np.round(np.std(arrayVal), roundVal)
                    self.maxY = np.round(np.max(arrayVal), roundVal)
                    self.minY = np.round(np.min(arrayVal), roundVal)
        
                    speciesParams.append([species,sp,numOfSeqs, self.meanY, self.medianY, self.stdY, self.maxY, self.minY])
                    
                    if desk.saveData:
                        listMI_Anova.append(arrayVal)
        
                    if desk.showGraph or desk.saveGraph:
                        if which_module == 'Entropy':
                            gHist = graphPac.Histogram_FreqDistribution(desk, title)
                            
                            gHist.meanY = self.meanY
                            gHist.medianY = self.medianY
                            gHist.stdY  = self.stdY
                            gHist.maxY = self.maxY
                            gHist.minY = self.minY
                                            
                            gHist.plot_H_MI('V', xSeq=xSeq, ySeq=arrayVal, ySE=arraySE, showError=True)
                            gHist.densityBar('V', seq=arrayVal)
                            
                            pictureName = 'VHShannon_%s%s%s'%(sufix, filename_correction, desk.stri_random)
                            gHist.myPlot.print_graph(desk, gHist.fig, pictureName, frame=desk.tk_root)
                        else:
                            ''' 3D dont has ceil '''
                            if desk.is3D:
                                limSup = self.maxY
                            else:
                                ceil = desk.heatmap_ceil_value
                                
                                ''' the same roof for all heatmaps: parametrize in future '''
                                if desk.heatmap_ceil:
                                    if self.maxY <= ceil:
                                        limSup = ceil
                                    else:
                                        limSup = self.maxY
                                else:
                                    limSup = self.maxY
                            
                            ''''  updated 28/09/2015 '''
                            gMI = vmi.plotHeatMap(desk, desk.is3D, arrayVal, vmi.ijList, L, maxiPos, title, species=species, limSup=limSup, roundVal=roundVal, str_correction=str_correction)
                 
                            if desk.is3D:
                                pictureName = 'HeatMap_3D_VMI_%s%s%s'%(sufix, filename_correction,desk.stri_random)
                            else:
                                gMI.densityHeatmapBar(arrayVal, limSup)
                                pictureName = 'HeatMap_2D_VMI_%s%s%s'%(sufix, filename_correction,desk.stri_random)

                            gMI.myPlot.print_graph(desk, gMI.fig, pictureName, frame=desk.tk_root)
        
                            '''
                            plt.cla()
                            plt.clf()
                            plt.close()
                            del gMI
                            gc.collect()
                            '''

                # Flavio 02/06/2015
                if which_module == 'Entropy':
                    which_symb = 'VHShannon'
                else:
                    which_symb = 'VMI'
                    
                    
                if desk.saveData:
                    sufix = ('%s_%s_%s_%s_NOL%i_%iL_cutoff%i') %\
                            (desk.organism, desk.minmax, desk.seqType, desk.gene_title, desk.numOfLetters, desk.cutoffLength, desk.cutoffNumSeq)
            
                    filename = '%s_summary_%s%s%s.txt' % (which_symb, sufix, filename_correction, desk.stri_random)
                    stri = vmi.ent.calc_anova(listMI_Anova, sufix)
                    # desk, speciesParams, roundVal=4, filename=None, stri = '', saveData=False):
                    vmi.ent.print_data_summary(desk, speciesParams, roundVal=roundVal, filename=filename, stri=stri)
                            
Esempio n. 6
0
    def looping(self, desk, opt):

        plt.close("all")
        plt.clf()

        desk.withCorrection = opt[0]
        desk.minmax = opt[1]

        if desk.withCorrection:
            self.str_correction = '-bias corr.'
            self.filename_correction = '_bias_corr'
        else:
            self.str_correction = ''
            self.filename_correction = ''

        print("\n--->>>", desk.minmax, self.str_correction)

        desk.colorThreshold = desk.colorThreshold_var.get()

        if desk.mnat:
            desk.unit = 'mnat'
            desk.factor = 1000
            desk.roundVal = 2
        else:
            desk.unit = 'nat'
            desk.factor = 1
            desk.roundVal = 4

        if desk.vert_horiz == 'HMI':
            xLabel = 'JSD(HMI) (%s)' % (desk.unit)

            title = "Hierarchical Cluster Method=%s of JSD(HMI)- %s %s %s"\
             %(desk.cluster_method_desc, desk.organism, desk.seqType, desk.gene)

            if desk.frame > 0:
                title += '\nJSD(HMI) %s%s, desk.frame %i, #letter %i, min(L)=%i, min(#seqs)=%i' % \
                (desk.minmax, self.str_correction, desk.frame, desk.numOfLetters, desk.cutoffLength, desk.cutoffNumSeq)
            else:
                title += '\n%s%s, letter %i, min(L)=%i, min(#seqs)=%i' % \
                (desk.minmax, self.str_correction, desk.numOfLetters, desk.cutoffLength, desk.cutoffNumSeq)
        elif desk.vert_horiz == 'VMI':
            xLabel = 'JSD(VMI) (%s)' % (desk.unit)
            ''' multidimensional distance '''
            title = "Hierarchical Cluster Method=%s of JSD(VMI), %s %s %s"\
                %(desk.cluster_method_desc, desk.organism, desk.seqType, desk.gene)

            title += '\n%s%s, #letter %i, min(L)=%i, min(#seqs)=%i' % \
                (desk.minmax, self.str_correction, desk.numOfLetters, desk.cutoffLength, desk.cutoffNumSeq)

        else:
            xLabel = 'JSD(VSH) (nat)'
            ''' multidimensional distance '''
            title = "Hierarchical Cluster Method=%s of JSD(VSH), %s %s %s"\
                %(desk.cluster_method_desc, desk.organism, desk.seqType, desk.gene)

            title += '\n%s%s, #letter %i, min(L)=%i, min(#seqs)=%i' % \
                (desk.minmax, self.str_correction, desk.numOfLetters, desk.cutoffLength, desk.cutoffNumSeq)

        desk.set_cluster_filenames()
        filename = desk.cluster_input_filename

        ret, _, colHeaders, dataMatrix = self.open_distance_matrix_file(
            desk.rootTable + filename)
        if not ret:
            self.error_msg = 'Could not find %s' % (desk.rootTable + filename)
            return False

        pictureName = 'Cluster_' + filename.replace('.txt', '')
        ''' desk.dr defined in pipe_desktop get_params() '''
        if desk.dr:
            rows = desk.dr.labels(colHeaders)
        else:
            rows = colHeaders

        #convert native python array into a numpy array
        # dataMatrix = log10(dataMatrix)
        # print dataMatrix

        dataMatrix = np.array(dataMatrix)
        maxDist = 0
        if desk.factor != 1:
            for i in range(len(dataMatrix)):
                for j in range(len(dataMatrix[i])):
                    dataMatrix[i][j] = dataMatrix[i][j] * desk.factor
                    if dataMatrix[i][j] > maxDist:
                        maxDist = dataMatrix[i][j]
        else:
            for i in range(len(dataMatrix)):
                for j in range(len(dataMatrix[i])):
                    if dataMatrix[i][j] > maxDist:
                        maxDist = dataMatrix[i][j]

        # single, weighted, average, co    mplete
        linkageMatrix = linkage(dataMatrix,
                                method=desk.cluster_method,
                                metric='euclidean')
        ''' finding maximum '''
        maxLinkDist = 0
        for i in range(len(linkageMatrix)):
            for j in range(len(linkageMatrix[i])):
                if linkageMatrix[i][j] > maxLinkDist:
                    maxLinkDist = linkageMatrix[i][j]
        ''' hierarchical cluster distorce distances
        factor = maxDist/(2*maxLinkDist) '''

        for i in range(len(linkageMatrix)):
            linkageMatrix[i][2] = round(linkageMatrix[i][2] * .5,
                                        desk.roundVal)

        fig = plt.figure(1, dpi=desk.dpi)
        ax = fig.add_subplot('111')

        plt.subplots_adjust(bottom=.1, left=.05, right=.84)

        yLabel = 'species'

        plt.rcParams['lines.linewidth'] = 2.5
        fontsize = 26

        plt.title(title, fontsize=fontsize)
        ax.set_xlabel(xLabel, fontsize=fontsize)
        ax.set_ylabel(yLabel, fontsize=fontsize)

        # make colorbar labels bigger
        leaf_font_size = 28
        ''' ddata = '''
        try:
            dendrogram(
                linkageMatrix,
                color_threshold=desk.colorThreshold,
                labels=rows,
                orientation='right'
            )  # show_leaf_counts=True   , leaf_font_size=leaf_font_size
        except:
            print("Failed in printing dendrogram")
            pass

        plt.xticks(fontsize=leaf_font_size)
        plt.yticks(fontsize=leaf_font_size)
        '''
        # print ddata
        spList = ddata['ivl']
        # print len(spList), spList
        nickList = copy.deepcopy(spList)
        nickList.sort()
        
        dic = {}
        for i in range(len(spList)):
            sp = spList[i]
            for j in range(len(nickList)):
                if sp == nickList[j]:
                    dic[i] = j
                    #print i, spList[i], ' equal ',j, nickList[j]
                    break
        
        
        count = 0
        for i, d in zip(ddata['icoord'], ddata['dcoord']):
            count += 1
            # print i, d
        
            # specie01 x specie02 - mean error distance 
            num  = (i[0]-5)/10.
            sp1a = int(num)
            diff = num - sp1a
        
            if diff == 0:
                wei1a = 1
                sp1b  = sp1a
                wei1b = 0
            else:
                sp1b = sp1a+1
                wei1a = diff
                wei1b = 1. - wei1a
                
            #if num == 0:
            #    print '>>>> viri'
            num  = (i[2]-5)/10.
            sp2a = int(num)
            diff = num - sp2a
        
            if diff == 0:
                sp2b  = sp2a
                wei2a = 1
                wei2b = 0
            else:
                sp2b = sp2a+1
                wei2a = diff
                wei2b = 1. - wei2a
                
            #print sp1a, sp1b, sp2a, sp2b
            #print wei1a, wei1b, wei2a, wei2b
        
        
            ste = 0.
            if wei1a>0 and wei2a>0:
                ste += wei1a*wei2a*seMatrix[dic[sp1a]][dic[sp2a]]  
            if wei1a>0 and wei2b>0:
                ste += wei1a*wei2b*seMatrix[dic[sp1a]][dic[sp2b]]  
            if wei1b>0 and wei2a>0:
                ste += wei1b*wei2a*seMatrix[dic[sp1b]][dic[sp2a]]  
            if wei1b>0 and wei2b>0:
                # print sp1b, sp2b
                ste += wei1b*wei2b*seMatrix[dic[sp1b]][dic[sp2b]]
            
            ste = round(ste,4)
        
            dist = seMatrix[dic[sp1a]][dic[sp2a]]
            dist = round(dist,4)
            
            # print 'dist', dist, 'ste', ste
        
            x = 0.5 * sum(i[1:3])
            y = round(d[1],4)
            stry = str(y) + '\nd='+str(dist) + '\nse='+str(ste)
            plt.plot(x, y, 'ro')
            stry = ''
            if abs(y) > desk.colorThreshold:
                plt.annotate(stry, (x, y), xytext=(0, -8),
                             textcoords='offset points',
                             va='top', ha='center')
        '''

        self.myPlot = graphPac.Plot()
        self.myPlot.print_graph(desk,
                                fig,
                                pictureName=pictureName,
                                frame=desk.tk_root,
                                stay=True)

        return True
Esempio n. 7
0
    def __init__(self, desk):

        self.desk = desk
        self.failure = True
        self.error_msg = ''

        try:
            desk.get_params()
        except:
            self.error_msg = 'Could not get parameters.'
            return

        mbs = mb.mrBayesClass(desk)

        if not mbs.read_runPs():
            self.error_msg = 'Problems reading Mr.Bayes files'
            return

        if not mbs.stat_files_to_dic():
            self.error_msg = 'Could not read Mr.Bayes statistic files'
            return

        mbs.summary = {}

        params = []
        if desk.piA_var.get(): params.append("pi(A)")
        if desk.piC_var.get(): params.append("pi(C)")
        if desk.piG_var.get(): params.append("pi(G)")
        if desk.piT_var.get(): params.append("pi(T)")
        if desk.rAC_var.get(): params.append("r(A<->C)")
        if desk.rAG_var.get(): params.append("r(A<->G)")
        if desk.rAT_var.get(): params.append("r(A<->T)")
        if desk.rCG_var.get(): params.append("r(C<->G)")
        if desk.rCT_var.get(): params.append("r(C<->T)")
        if desk.rGT_var.get(): params.append("r(G<->T")
        if desk.LnL_var.get(): params.append("LnL")
        if desk.LnPr_var.get(): params.append("LnPr")
        if desk.TL_var.get(): params.append("TL")
        if desk.alpha_var.get(): params.append("alpha")
        if desk.off_on_var.get(): params.append("s(off->on)")
        if desk.on_off_var.get(): params.append("s(on->off)")
        if desk.pinvar_var.get(): params.append("pinvar")

        if not params:
            self.error_msg = 'Define at least one param.'
            return

        iPar = 0

        numCols = 46
        numLines = 16
        self.myPlot = graphPac.Plot()

        for par in params:
            iPar += 1
            mbs.summary[par] = {}

            #fig = figList[iPar]
            fig = plt.figure(iPar)
            plt.subplots_adjust(left=0.05, right=0.95, bottom=0.05, top=0.90)

            # print plt.get_backend()
            mng = plt.get_current_fig_manager()
            mng.window.wm_geometry("1400x900+50+50")

            seqXs = []
            mini, maxi = float('inf'), -float('inf')

            for min_max in mbs.min_maxs:
                for alig_cons in mbs.alig_conss:
                    for std_covar in mbs.std_covars:

                        study = min_max + '-' + alig_cons + '-' + std_covar

                        try:
                            vals = np.array(mbs.dic_detaild_params[study][par])

                            if np.min(vals) < mini:
                                mini = min(vals)
                            elif np.max(vals) > maxi:
                                maxi = max(vals)
                        except:
                            pass

            if par != "LnL" and par != "LnPr":
                if par == "alpha":
                    mini = 0
                else:
                    if mini < 0: mini = 0

            if par in ["pi(A)", "pi(C)", "pi(G)", "pi(T)"]:
                if maxi > 1: maxi = 1

            ticks = []
            ticks0 = []
            div = 5
            delta = (maxi - mini) / float(div - 1)
            for i in range(div):
                ticks.append(round(mini + delta * i, 2))
                ticks0.append(0)

            cntCols = 0
            stri_summary = "   \tAligned\tConsensus\n"
            for min_max in mbs.min_maxs:
                for alig_cons in mbs.alig_conss:

                    listData = []

                    cntLine = 0

                    for std_covar in mbs.std_covars:
                        try:
                            _ = mbs.mb_filenames[min_max][alig_cons][std_covar]
                            didFind = True
                        except:
                            didFind = False
                            continue

                        study = min_max + '-' + alig_cons + '-' + std_covar
                        '''
                        try:
                            dic = mbs.dic_detaild_params[study]
                        except:
                            # if file not found or could not read: it is not in dic_detailed_params 
                            continue
                        '''

                        mbs.summary[par][study] = {}

                        try:
                            vals = np.array(mbs.dic_detaild_params[study][par])

                            mbs.summary[par][study] = {}
                            N = len(vals)
                            seqXs.append(np.array(vals))

                            mu, sigma, hmu, hsigma, q025, _, q2, _, q975 = mbs.stat_ppf_decimal(
                                vals, par)

                            mbs.summary[par][study]["N"] = N
                            mbs.summary[par][study]["mu"] = mu
                            mbs.summary[par][study]["sigma"] = sigma
                            SE = sigma / np.sqrt(N)
                            mbs.summary[par][study]["SE"] = SE
                            mbs.summary[par][study]["N"] = N
                            mbs.summary[par][study]["median"] = q2
                            mbs.summary[par][study]["q025"] = q025
                            mbs.summary[par][study]["q975"] = q975

                            if hmu:
                                mbs.summary[par][study]["hmu"] = hmu
                                mbs.summary[par][study]["hsigma"] = hsigma
                                hSE = hsigma / np.sqrt(N)
                                mbs.summary[par][study]["hSE"] = hSE

                            mbs.summary[par][study]["min"] = np.min(vals)
                            mbs.summary[par][study]["max"] = np.max(vals)

                            try:
                                ''' LnL doesn't have ESS and PSRF - adopted from TL
                                    total tree length (the sum of all branch lengths, TL) '''
                                if par == "LnL":
                                    mbs.summary[par][study][
                                        "avgESS"] = mbs.dic_pstat[study]["TL"][
                                            'avgESS']
                                else:
                                    mbs.summary[par][study][
                                        "avgESS"] = mbs.dic_pstat[study][par][
                                            'avgESS']
                            except:
                                mbs.summary[par][study]["avgESS"] = 0

                            try:
                                if par == "LnL":
                                    mbs.summary[par][study][
                                        "PSRF"] = mbs.dic_pstat[study]["TL"][
                                            'PSRF']
                                else:
                                    mbs.summary[par][study][
                                        "PSRF"] = mbs.dic_pstat[study][par][
                                            'PSRF']
                            except:
                                mbs.summary[par][study]["PSRF"] = 10000

                            if mbs.summary[par][study]["avgESS"] < 90:
                                sAvgESS = "avgESS=%3.1f **" % mbs.summary[par][
                                    study]["avgESS"]
                            else:
                                sAvgESS = "avgESS=%3.1f" % mbs.summary[par][
                                    study]["avgESS"]

                            if (mbs.summary[par][study]["PSRF"] - 1) > .1:
                                sPSRF = " PSRF=%1.2f ***" % mbs.summary[par][
                                    study]["PSRF"]
                            else:
                                sPSRF = " PSRF=%1.2f" % mbs.summary[par][
                                    study]["PSRF"]

                            if cntLine == 0:
                                ax = plt.subplot2grid((numLines, numCols),
                                                      (0, cntCols),
                                                      rowspan=4,
                                                      colspan=10)
                            else:
                                ax = plt.subplot2grid((numLines, numCols),
                                                      (6, cntCols),
                                                      rowspan=4,
                                                      colspan=10)

                            if par == "LnL":
                                print par, study, mu, sigma, hmu, hsigma
                                stri_summary = mbs.show_lnl(
                                    desk, plt, ax, study, sAvgESS, sPSRF,
                                    cntLine, cntCols, min_max, alig_cons,
                                    stri_summary)
                            else:
                                print par, study, mu, sigma
                                mbs.show_distrib(plt, ax, par, study, sAvgESS,
                                                 sPSRF, ticks, desk.colors,
                                                 iPar, mini, maxi, cntCols)

                            listData.append(vals)
                            cntLine += 1
                        except:
                            pass

                    if didFind:
                        if len(listData) == 2:
                            if par == "LnL":
                                mbs.show_LRT(alig_cons, par, plt, numLines,
                                             numCols, cntCols)
                            else:
                                mbs.show_conf_interval(min_max, alig_cons, par,
                                                       plt, numLines, numCols,
                                                       cntCols, ticks, ticks0,
                                                       mini, maxi, listData)

                            mbs.show_qqplot2(min_max, alig_cons, par, plt,
                                             numLines, numCols, cntCols,
                                             listData)

                        else:
                            pass

                        cntCols += 12

            if par == "LnL":
                stri = ""
            else:
                f_value, p_value = mbs.calc_anova(seqXs)
                if p_value <= 0.05:
                    stri = ', at least one distribution is statistically different.'
                else:
                    stri = ', the distributions are statistically similar.'

                stri += 'ANOVA: f-value %2.3e   p_value %2.3e' % (f_value,
                                                                  p_value)

            left = .05
            top = .97
            fig.text(left, top, par + stri, color="red")

            stri = mbs.ttest_summary(par)
            print stri

            sPar = par.replace(">", "").replace("<", "")
            pictureName = "%s_%s_mr_bayes_analisis_param_%s" % (
                desk.organism, desk.gene_title, sPar)
            self.myPlot.print_graph(self.desk,
                                    fig,
                                    pictureName,
                                    frame=self.desk.tk_root,
                                    stay=False)

        print stri_summary

        if desk.saveData:
            mbs.save_params(params)

        self.failure = False
        return