def __init__(self, desk): self.desk = desk self.dr = desk.dr ''' ------------------------------------------------------- ''' self.entFile = bioClass.Entropy_File() self.gg = graphPac.GeneralGraphic(desk) self.minmax = desk.minmax
''' Created on Sep 14, 2015 @author: Flavio Lichtenstein @local: Unifesp DIS - Bioinformatica for each species calc if it is p-value < .05 on its JSD distribution ''' import classes.BarGraphic as graphPac gg = graphPac.GeneralGraphic() import tests.Desktop_test as Desktop_test import random import classes.Drosophila as dro organism = "Drosophila" gene = "Adh" title = '' cutoffLength = 100 cutoffNumSeq = 10 filename_default = 'default.ini' if organism == "Drosophila": dr = dro.Drosophila() else: dr = None desk = Desktop_test.Desktop(filename_default, organism, gene, title, cutoffLength, cutoffNumSeq)
def save_tables_show_graph(self,cmp_file=""): bias_corr_list = [self.withCorrection] print("-----------------------------------------------------------") for bias_corr in bias_corr_list: iLoop = 0 speciesParams = [] listMI_Anova = [] for mat in self.list_species_name_hmi: iLoop += 1 filename, name, species, hmi, sufix = mat numOfSeqs = self.sampleElems L = self.LH0 print("%i) %s - %s, %s"%(iLoop, name, self.minmax, "bias corr." if bias_corr else 'no corr.' )) sp = self.which_sp(self.organism, species) self.withCorrection = bias_corr if self.withCorrection: str_correction = ' (bias corr.)' filename_correction = '_bias_corr' else: str_correction = '' filename_correction = '' if self.frame == 0: sFrame = '' else: sFrame = ', frame=' + str(self.frame) title = 'HMI: %s %s, %s %s\n%s%s; %i seqs; len=%i%s; #letter=%i'%\ (self.organism, species, self.seqType, self.gene, self.minmax, str_correction, numOfSeqs, L, sFrame, self.numOfLetters) ''' HMI mean and sdv for each k from n sequences ''' if not self.withCorrection: arrayMIthis = np.array(hmi.arrayMI) arraySE = np.array(hmi.arraySE) else: arrayMIthis = np.array(hmi.arrayMIcorr) arraySE = np.array(hmi.arraySEcorr) if self.mnat: arrayMIthis *= 1000 arraySE *= 1000 roundVal = 1 self.unit = 'mnat' else: roundVal = 4 self.unit = 'nat' if self.norm: arrayMIthis /= self.numOfLetters arraySE /= self.numOfLetters # self.showmsg_obs(' >>> species %s with %i sequences'%(species, numOfSeqs)) ''' first time build abcissa if choose == 1 and self.frame == 1 or self.frame == 0 ''' xSeq = [] if self.frame < 2: for k in range( len(arrayMIthis)): xSeq.append(k+3) else: ''' frame1: k = {3,6,9, ... 3n} ''' for k in range( len(arrayMIthis)): xSeq.append((k+1)*3) arrayVal = [] ySeqSEFinal = [] ''' if only one curve ''' if self.frame < 2: arrayVal = arrayMIthis ySeqSEFinal = arraySE else: ''' if choose > 1 there are 3 others curves, and k jumps 3 x 3 ''' for m in range(len(arrayMIthis)): arrayVal.append(arrayMIthis[m]) arrayVal.append(arrayMIthis[m]) arrayVal.append(arrayMIthis[m]) ySeqSEFinal.append(arraySE[m]) ySeqSEFinal.append(arraySE[m]) ySeqSEFinal.append(arraySE[m]) self.meanY = np.round(np.mean(arrayVal), roundVal) self.medianY = np.round(np.median(arrayVal), roundVal) self.stdY = np.round(np.std(arrayVal), roundVal) self.maxY = np.round(np.max(arrayVal), roundVal) self.minY = np.round(np.min(arrayVal), roundVal) speciesParams.append([species,sp,numOfSeqs, self.meanY, self.medianY, self.stdY, self.maxY, self.minY]) listMI_Anova.append(arrayVal) if self.showGraph or self.saveGraph: if self.frame < 2: gHist = graphPac.Histogram_FreqDistribution(self, title) gHist.meanY = self.meanY gHist.medianY = self.medianY gHist.stdY = self.stdY gHist.maxY = self.maxY gHist.minY = self.minY gHist.desk = self gHist.plot_H_MI('V', xSeq=xSeq, ySeq=arrayVal, ySE=arraySE, showError=True, \ unit=self.unit, roundVal=roundVal) gHist.plot_H_MI('H', xSeq=xSeq, ySeq=arrayVal, ySE=ySeqSEFinal, showError=True, \ unit=self.unit, roundVal=roundVal) gHist.densityBar("H", seq=arrayVal, unit=self.unit, roundVal = roundVal) else: gHist.sameBar(xSeq=xSeq, arrayMIthis=arrayVal,linestyleCode=self.arrLinestyleCode[self.frame], color=self.arrColor[self.frame]) if (self.frame == 0) or (self.frame==3): ''' sample in filename ''' pictureName = 'HMI_%s%s%s'%(sufix, filename_correction, cmp_file) gHist.myPlot.print_graph(self, gHist.fig, pictureName, frame=self.tk_root, stay=True) ''' del gHist plt.cla() plt.clf() gc.collect() ''' if self.saveData : sufix = ('%s_%s_%s_%s_frame%i_NOL%i_%iL_cutoff%i') %\ (self.organism, self.minmax, self.seqType, self.gene, self.frame, self.numOfLetters, self.cutoffLength, self.cutoffNumSeq) ''' summary with sample in filename ''' filename = 'HMI_summary_%s%s%s.txt' % (sufix, filename_correction, cmp_file) stri = hmi.ent.calc_anova(listMI_Anova, sufix) hmi.ent.print_data_summary(self, speciesParams, roundVal=roundVal, filename=filename, stri=stri) self.failure = False self.error_msg = 'Task ended. All right.' ''' in the end of save_tables_show_graph clean memory ''' del self.list_species_name_hmi try: gc.collect() except: pass return True
def save_tables_show_graph(self, desk): if desk.each_all == 'each': bias_corr_list = [desk.withCorrection] else: bias_corr_list = [False, True] print("-----------------------------------------------------------") for bias_corr in bias_corr_list: iLoop = 0 speciesParams = [] listMI_Anova = [] for mat in self.list_species_name_hmi: iLoop += 1 filename, name, species, hmi, sufix = mat mat = desk.dicParams[species] if desk.minmax == 'mincut': numOfSeqs = mat[2] L = mat[5] else: numOfSeqs = mat[3] L = mat[6] print("%i) %s - %s, %s" % (iLoop, name, desk.minmax, "bias corr." if bias_corr else 'no corr.')) if desk.organism == 'Drosophila': sp = self.dr.mnemonic(species.replace('Drosophila ', '')) else: sp = species desk.withCorrection = bias_corr if desk.withCorrection: str_correction = ' (bias corr.)' filename_correction = '_bias_corr' else: str_correction = '' filename_correction = '' if desk.frame == 0: sFrame = '' else: sFrame = ', frame=' + str(desk.frame) std_rand = "" if (desk.label_random == "") else " " + desk.label_random title = 'HMI: %s %s, %s %s\n%s%s%s; %i seqs; len=%i%s; #letter=%i'%\ (desk.organism, species, desk.seqType, desk.gene_title, desk.minmax, str_correction, std_rand, numOfSeqs, L, sFrame, desk.numOfLetters) ''' HMI mean and sdv for each k from n sequences ''' if not desk.withCorrection: arrayMIthis = np.array(hmi.arrayMI) arraySE = np.array(hmi.arraySE) else: arrayMIthis = np.array(hmi.arrayMIcorr) arraySE = np.array(hmi.arraySEcorr) if desk.mnat: arrayMIthis *= 1000 arraySE *= 1000 roundVal = 1 unit = 'mnat' else: roundVal = 4 unit = 'nat' if desk.norm: arrayMIthis /= desk.numOfLetters arraySE /= desk.numOfLetters # desk.showmsg_obs(' >>> species %s with %i sequences'%(species, numOfSeqs)) ''' first time build abcissa if choose == 1 and desk.frame == 1 or desk.frame == 0 ''' xSeq = [] if desk.frame < 2: for k in range(len(arrayMIthis)): xSeq.append(k + 3) else: ''' frame1: k = {3,6,9, ... 3n} ''' for k in range(len(arrayMIthis)): xSeq.append((k + 1) * 3) arrayVal = [] ySeqSEFinal = [] ''' if only one curve ''' if desk.frame < 2: arrayVal = arrayMIthis ySeqSEFinal = arraySE else: ''' if choose > 1 there are 3 others curves, and k jumps 3 x 3 ''' for m in range(len(arrayMIthis)): arrayVal.append(arrayMIthis[m]) arrayVal.append(arrayMIthis[m]) arrayVal.append(arrayMIthis[m]) ySeqSEFinal.append(arraySE[m]) ySeqSEFinal.append(arraySE[m]) ySeqSEFinal.append(arraySE[m]) self.meanY = np.round(np.mean(arrayVal), roundVal) self.medianY = np.round(np.median(arrayVal), roundVal) self.stdY = np.round(np.std(arrayVal), roundVal) self.maxY = np.round(np.max(arrayVal), roundVal) self.minY = np.round(np.min(arrayVal), roundVal) speciesParams.append([ species, sp, numOfSeqs, self.meanY, self.medianY, self.stdY, self.maxY, self.minY ]) listMI_Anova.append(arrayVal) if desk.showGraph or desk.saveGraph: if desk.frame < 2: gHist = graphPac.Histogram_FreqDistribution( desk, title) gHist.meanY = self.meanY gHist.medianY = self.medianY gHist.stdY = self.stdY gHist.maxY = self.maxY gHist.minY = self.minY gHist.plot_H_MI('V', xSeq=xSeq, ySeq=arrayVal, ySE=arraySE, showError=True) gHist.plot_H_MI('H', xSeq=xSeq, ySeq=arrayVal, ySE=ySeqSEFinal, showError=True) gHist.densityBar("H", seq=arrayVal) else: gHist.sameBar( xSeq=xSeq, arrayMIthis=arrayVal, linestyleCode=self.arrLinestyleCode[desk.frame], color=self.arrColor[desk.frame]) if (desk.frame == 0) or (desk.frame == 3): pictureName = 'HMI_%s%s' % (sufix, filename_correction) gHist.myPlot.print_graph(desk, gHist.fig, pictureName, frame=desk.tk_root) ''' del gHist plt.cla() plt.clf() gc.collect() ''' if desk.saveData: sufix = ('%s_%s_%s_%s_frame%i_NOL%i_%iL_cutoff%i') %\ (desk.organism, desk.minmax, desk.seqType, desk.gene_title, desk.frame, desk.numOfLetters, desk.cutoffLength, desk.cutoffNumSeq) std_rand = "" if (desk.label_random == "") else desk.stri_random filename = 'HMI_summary_%s%s%s.txt' % ( sufix, filename_correction, std_rand) stri = hmi.ent.calc_anova(listMI_Anova, sufix) hmi.ent.print_data_summary(desk, speciesParams, roundVal=roundVal, filename=filename, stri=stri) self.failure = False self.error_msg = 'Task ended. All right.' return True
def save_tables_show_graph(self, desk, which_module): if desk.each_all == 'each': list_modules = [which_module] bias_corr_list = [desk.withCorrection] else: list_modules = ['Entropy', 'VMI'] bias_corr_list = [False, True] print("-----------------------------------------------------------") for which_module in list_modules: for bias_corr in bias_corr_list: iLoop = 0 listMI_Anova = [] speciesParams = [] for mat in self.list_species_name_vmi: iLoop += 1 filename, name, species, vmi, sufix = mat mat = desk.dicParams[species] if desk.minmax == 'mincut': numOfSeqs = mat[2] L = mat[5] else: numOfSeqs = mat[3] L = mat[6] print("%i) %s - %s %s %s"%(iLoop, name, desk.minmax, "bias corr." if bias_corr else 'no corr.', which_module )) if desk.organism == 'Drosophila': sp = self.dr.mnemonic(species.replace('Drosophila ','')) else: sp = species desk.withCorrection = bias_corr if desk.withCorrection: str_correction = ' (bias corr.)' filename_correction = '_bias_corr' else: str_correction = '' filename_correction = '' std_rand = "" if (desk.label_random == "") else " " + desk.label_random if which_module == 'Entropy': title = 'Entropy Distribution %s %s, %s %s\n%s%s%s; %i seqs; len=%i; #letter=%i'%\ (desk.organism, species, desk.seqType, desk.gene_title, desk.minmax, str_correction, std_rand, numOfSeqs, L, desk.numOfLetters) else: title = 'VMI Heat Map %s %s, %s %s %s%s%s\n%i seqs; len=%i; #letter=%i'%\ (desk.organism, species, desk.seqType, desk.gene_title, desk.minmax, str_correction, std_rand, numOfSeqs, L, desk.numOfLetters) if which_module == 'Entropy': if not desk.withCorrection: arrayVal = np.array(vmi.HShannonList) arraySE = np.array(vmi.SeHShannonList) else: arrayVal = np.array(vmi.HShannonCorrList) arraySE = np.array(vmi.SeHShannonCorrList) else: if not desk.withCorrection: arrayVal = np.array(vmi.MIlist) arraySE = np.array(vmi.SeMIList) else: arrayVal = np.array(vmi.MIcorrList) arraySE = np.array(vmi.SeMICorrList) ''' normalization dividing by numOfLetters ''' if desk.norm: arrayVal /= desk.numOfLetters arraySE /= desk.numOfLetters ''' mili nats ''' if desk.mnat: arrayVal *= 1000 arraySE *= 1000 roundVal = 2 else: roundVal = 4 # print '--- params -----------------' maxMI = 0 maxiPos = None is_zero = True for pos in range(len(arrayVal)): if arrayVal[pos] > maxMI: maxMI = arrayVal[pos] SE = arraySE[pos] i,j = vmi.ijList[pos] maxiPos = [i,j,maxMI,SE] is_zero = False if is_zero: stri = '### Species %s has MI = ZERO. Too conserved data sequences. Impossible to include in analysis.'%(species) print(stri) continue xSeq = [x for x in range(len(arrayVal))] self.meanY = np.round(np.mean(arrayVal), roundVal) self.medianY = np.round(np.median(arrayVal), roundVal) self.stdY = np.round(np.std(arrayVal), roundVal) self.maxY = np.round(np.max(arrayVal), roundVal) self.minY = np.round(np.min(arrayVal), roundVal) speciesParams.append([species,sp,numOfSeqs, self.meanY, self.medianY, self.stdY, self.maxY, self.minY]) if desk.saveData: listMI_Anova.append(arrayVal) if desk.showGraph or desk.saveGraph: if which_module == 'Entropy': gHist = graphPac.Histogram_FreqDistribution(desk, title) gHist.meanY = self.meanY gHist.medianY = self.medianY gHist.stdY = self.stdY gHist.maxY = self.maxY gHist.minY = self.minY gHist.plot_H_MI('V', xSeq=xSeq, ySeq=arrayVal, ySE=arraySE, showError=True) gHist.densityBar('V', seq=arrayVal) pictureName = 'VHShannon_%s%s%s'%(sufix, filename_correction, desk.stri_random) gHist.myPlot.print_graph(desk, gHist.fig, pictureName, frame=desk.tk_root) else: ''' 3D dont has ceil ''' if desk.is3D: limSup = self.maxY else: ceil = desk.heatmap_ceil_value ''' the same roof for all heatmaps: parametrize in future ''' if desk.heatmap_ceil: if self.maxY <= ceil: limSup = ceil else: limSup = self.maxY else: limSup = self.maxY '''' updated 28/09/2015 ''' gMI = vmi.plotHeatMap(desk, desk.is3D, arrayVal, vmi.ijList, L, maxiPos, title, species=species, limSup=limSup, roundVal=roundVal, str_correction=str_correction) if desk.is3D: pictureName = 'HeatMap_3D_VMI_%s%s%s'%(sufix, filename_correction,desk.stri_random) else: gMI.densityHeatmapBar(arrayVal, limSup) pictureName = 'HeatMap_2D_VMI_%s%s%s'%(sufix, filename_correction,desk.stri_random) gMI.myPlot.print_graph(desk, gMI.fig, pictureName, frame=desk.tk_root) ''' plt.cla() plt.clf() plt.close() del gMI gc.collect() ''' # Flavio 02/06/2015 if which_module == 'Entropy': which_symb = 'VHShannon' else: which_symb = 'VMI' if desk.saveData: sufix = ('%s_%s_%s_%s_NOL%i_%iL_cutoff%i') %\ (desk.organism, desk.minmax, desk.seqType, desk.gene_title, desk.numOfLetters, desk.cutoffLength, desk.cutoffNumSeq) filename = '%s_summary_%s%s%s.txt' % (which_symb, sufix, filename_correction, desk.stri_random) stri = vmi.ent.calc_anova(listMI_Anova, sufix) # desk, speciesParams, roundVal=4, filename=None, stri = '', saveData=False): vmi.ent.print_data_summary(desk, speciesParams, roundVal=roundVal, filename=filename, stri=stri)
def looping(self, desk, opt): plt.close("all") plt.clf() desk.withCorrection = opt[0] desk.minmax = opt[1] if desk.withCorrection: self.str_correction = '-bias corr.' self.filename_correction = '_bias_corr' else: self.str_correction = '' self.filename_correction = '' print("\n--->>>", desk.minmax, self.str_correction) desk.colorThreshold = desk.colorThreshold_var.get() if desk.mnat: desk.unit = 'mnat' desk.factor = 1000 desk.roundVal = 2 else: desk.unit = 'nat' desk.factor = 1 desk.roundVal = 4 if desk.vert_horiz == 'HMI': xLabel = 'JSD(HMI) (%s)' % (desk.unit) title = "Hierarchical Cluster Method=%s of JSD(HMI)- %s %s %s"\ %(desk.cluster_method_desc, desk.organism, desk.seqType, desk.gene) if desk.frame > 0: title += '\nJSD(HMI) %s%s, desk.frame %i, #letter %i, min(L)=%i, min(#seqs)=%i' % \ (desk.minmax, self.str_correction, desk.frame, desk.numOfLetters, desk.cutoffLength, desk.cutoffNumSeq) else: title += '\n%s%s, letter %i, min(L)=%i, min(#seqs)=%i' % \ (desk.minmax, self.str_correction, desk.numOfLetters, desk.cutoffLength, desk.cutoffNumSeq) elif desk.vert_horiz == 'VMI': xLabel = 'JSD(VMI) (%s)' % (desk.unit) ''' multidimensional distance ''' title = "Hierarchical Cluster Method=%s of JSD(VMI), %s %s %s"\ %(desk.cluster_method_desc, desk.organism, desk.seqType, desk.gene) title += '\n%s%s, #letter %i, min(L)=%i, min(#seqs)=%i' % \ (desk.minmax, self.str_correction, desk.numOfLetters, desk.cutoffLength, desk.cutoffNumSeq) else: xLabel = 'JSD(VSH) (nat)' ''' multidimensional distance ''' title = "Hierarchical Cluster Method=%s of JSD(VSH), %s %s %s"\ %(desk.cluster_method_desc, desk.organism, desk.seqType, desk.gene) title += '\n%s%s, #letter %i, min(L)=%i, min(#seqs)=%i' % \ (desk.minmax, self.str_correction, desk.numOfLetters, desk.cutoffLength, desk.cutoffNumSeq) desk.set_cluster_filenames() filename = desk.cluster_input_filename ret, _, colHeaders, dataMatrix = self.open_distance_matrix_file( desk.rootTable + filename) if not ret: self.error_msg = 'Could not find %s' % (desk.rootTable + filename) return False pictureName = 'Cluster_' + filename.replace('.txt', '') ''' desk.dr defined in pipe_desktop get_params() ''' if desk.dr: rows = desk.dr.labels(colHeaders) else: rows = colHeaders #convert native python array into a numpy array # dataMatrix = log10(dataMatrix) # print dataMatrix dataMatrix = np.array(dataMatrix) maxDist = 0 if desk.factor != 1: for i in range(len(dataMatrix)): for j in range(len(dataMatrix[i])): dataMatrix[i][j] = dataMatrix[i][j] * desk.factor if dataMatrix[i][j] > maxDist: maxDist = dataMatrix[i][j] else: for i in range(len(dataMatrix)): for j in range(len(dataMatrix[i])): if dataMatrix[i][j] > maxDist: maxDist = dataMatrix[i][j] # single, weighted, average, co mplete linkageMatrix = linkage(dataMatrix, method=desk.cluster_method, metric='euclidean') ''' finding maximum ''' maxLinkDist = 0 for i in range(len(linkageMatrix)): for j in range(len(linkageMatrix[i])): if linkageMatrix[i][j] > maxLinkDist: maxLinkDist = linkageMatrix[i][j] ''' hierarchical cluster distorce distances factor = maxDist/(2*maxLinkDist) ''' for i in range(len(linkageMatrix)): linkageMatrix[i][2] = round(linkageMatrix[i][2] * .5, desk.roundVal) fig = plt.figure(1, dpi=desk.dpi) ax = fig.add_subplot('111') plt.subplots_adjust(bottom=.1, left=.05, right=.84) yLabel = 'species' plt.rcParams['lines.linewidth'] = 2.5 fontsize = 26 plt.title(title, fontsize=fontsize) ax.set_xlabel(xLabel, fontsize=fontsize) ax.set_ylabel(yLabel, fontsize=fontsize) # make colorbar labels bigger leaf_font_size = 28 ''' ddata = ''' try: dendrogram( linkageMatrix, color_threshold=desk.colorThreshold, labels=rows, orientation='right' ) # show_leaf_counts=True , leaf_font_size=leaf_font_size except: print("Failed in printing dendrogram") pass plt.xticks(fontsize=leaf_font_size) plt.yticks(fontsize=leaf_font_size) ''' # print ddata spList = ddata['ivl'] # print len(spList), spList nickList = copy.deepcopy(spList) nickList.sort() dic = {} for i in range(len(spList)): sp = spList[i] for j in range(len(nickList)): if sp == nickList[j]: dic[i] = j #print i, spList[i], ' equal ',j, nickList[j] break count = 0 for i, d in zip(ddata['icoord'], ddata['dcoord']): count += 1 # print i, d # specie01 x specie02 - mean error distance num = (i[0]-5)/10. sp1a = int(num) diff = num - sp1a if diff == 0: wei1a = 1 sp1b = sp1a wei1b = 0 else: sp1b = sp1a+1 wei1a = diff wei1b = 1. - wei1a #if num == 0: # print '>>>> viri' num = (i[2]-5)/10. sp2a = int(num) diff = num - sp2a if diff == 0: sp2b = sp2a wei2a = 1 wei2b = 0 else: sp2b = sp2a+1 wei2a = diff wei2b = 1. - wei2a #print sp1a, sp1b, sp2a, sp2b #print wei1a, wei1b, wei2a, wei2b ste = 0. if wei1a>0 and wei2a>0: ste += wei1a*wei2a*seMatrix[dic[sp1a]][dic[sp2a]] if wei1a>0 and wei2b>0: ste += wei1a*wei2b*seMatrix[dic[sp1a]][dic[sp2b]] if wei1b>0 and wei2a>0: ste += wei1b*wei2a*seMatrix[dic[sp1b]][dic[sp2a]] if wei1b>0 and wei2b>0: # print sp1b, sp2b ste += wei1b*wei2b*seMatrix[dic[sp1b]][dic[sp2b]] ste = round(ste,4) dist = seMatrix[dic[sp1a]][dic[sp2a]] dist = round(dist,4) # print 'dist', dist, 'ste', ste x = 0.5 * sum(i[1:3]) y = round(d[1],4) stry = str(y) + '\nd='+str(dist) + '\nse='+str(ste) plt.plot(x, y, 'ro') stry = '' if abs(y) > desk.colorThreshold: plt.annotate(stry, (x, y), xytext=(0, -8), textcoords='offset points', va='top', ha='center') ''' self.myPlot = graphPac.Plot() self.myPlot.print_graph(desk, fig, pictureName=pictureName, frame=desk.tk_root, stay=True) return True
def __init__(self, desk): self.desk = desk self.failure = True self.error_msg = '' try: desk.get_params() except: self.error_msg = 'Could not get parameters.' return mbs = mb.mrBayesClass(desk) if not mbs.read_runPs(): self.error_msg = 'Problems reading Mr.Bayes files' return if not mbs.stat_files_to_dic(): self.error_msg = 'Could not read Mr.Bayes statistic files' return mbs.summary = {} params = [] if desk.piA_var.get(): params.append("pi(A)") if desk.piC_var.get(): params.append("pi(C)") if desk.piG_var.get(): params.append("pi(G)") if desk.piT_var.get(): params.append("pi(T)") if desk.rAC_var.get(): params.append("r(A<->C)") if desk.rAG_var.get(): params.append("r(A<->G)") if desk.rAT_var.get(): params.append("r(A<->T)") if desk.rCG_var.get(): params.append("r(C<->G)") if desk.rCT_var.get(): params.append("r(C<->T)") if desk.rGT_var.get(): params.append("r(G<->T") if desk.LnL_var.get(): params.append("LnL") if desk.LnPr_var.get(): params.append("LnPr") if desk.TL_var.get(): params.append("TL") if desk.alpha_var.get(): params.append("alpha") if desk.off_on_var.get(): params.append("s(off->on)") if desk.on_off_var.get(): params.append("s(on->off)") if desk.pinvar_var.get(): params.append("pinvar") if not params: self.error_msg = 'Define at least one param.' return iPar = 0 numCols = 46 numLines = 16 self.myPlot = graphPac.Plot() for par in params: iPar += 1 mbs.summary[par] = {} #fig = figList[iPar] fig = plt.figure(iPar) plt.subplots_adjust(left=0.05, right=0.95, bottom=0.05, top=0.90) # print plt.get_backend() mng = plt.get_current_fig_manager() mng.window.wm_geometry("1400x900+50+50") seqXs = [] mini, maxi = float('inf'), -float('inf') for min_max in mbs.min_maxs: for alig_cons in mbs.alig_conss: for std_covar in mbs.std_covars: study = min_max + '-' + alig_cons + '-' + std_covar try: vals = np.array(mbs.dic_detaild_params[study][par]) if np.min(vals) < mini: mini = min(vals) elif np.max(vals) > maxi: maxi = max(vals) except: pass if par != "LnL" and par != "LnPr": if par == "alpha": mini = 0 else: if mini < 0: mini = 0 if par in ["pi(A)", "pi(C)", "pi(G)", "pi(T)"]: if maxi > 1: maxi = 1 ticks = [] ticks0 = [] div = 5 delta = (maxi - mini) / float(div - 1) for i in range(div): ticks.append(round(mini + delta * i, 2)) ticks0.append(0) cntCols = 0 stri_summary = " \tAligned\tConsensus\n" for min_max in mbs.min_maxs: for alig_cons in mbs.alig_conss: listData = [] cntLine = 0 for std_covar in mbs.std_covars: try: _ = mbs.mb_filenames[min_max][alig_cons][std_covar] didFind = True except: didFind = False continue study = min_max + '-' + alig_cons + '-' + std_covar ''' try: dic = mbs.dic_detaild_params[study] except: # if file not found or could not read: it is not in dic_detailed_params continue ''' mbs.summary[par][study] = {} try: vals = np.array(mbs.dic_detaild_params[study][par]) mbs.summary[par][study] = {} N = len(vals) seqXs.append(np.array(vals)) mu, sigma, hmu, hsigma, q025, _, q2, _, q975 = mbs.stat_ppf_decimal( vals, par) mbs.summary[par][study]["N"] = N mbs.summary[par][study]["mu"] = mu mbs.summary[par][study]["sigma"] = sigma SE = sigma / np.sqrt(N) mbs.summary[par][study]["SE"] = SE mbs.summary[par][study]["N"] = N mbs.summary[par][study]["median"] = q2 mbs.summary[par][study]["q025"] = q025 mbs.summary[par][study]["q975"] = q975 if hmu: mbs.summary[par][study]["hmu"] = hmu mbs.summary[par][study]["hsigma"] = hsigma hSE = hsigma / np.sqrt(N) mbs.summary[par][study]["hSE"] = hSE mbs.summary[par][study]["min"] = np.min(vals) mbs.summary[par][study]["max"] = np.max(vals) try: ''' LnL doesn't have ESS and PSRF - adopted from TL total tree length (the sum of all branch lengths, TL) ''' if par == "LnL": mbs.summary[par][study][ "avgESS"] = mbs.dic_pstat[study]["TL"][ 'avgESS'] else: mbs.summary[par][study][ "avgESS"] = mbs.dic_pstat[study][par][ 'avgESS'] except: mbs.summary[par][study]["avgESS"] = 0 try: if par == "LnL": mbs.summary[par][study][ "PSRF"] = mbs.dic_pstat[study]["TL"][ 'PSRF'] else: mbs.summary[par][study][ "PSRF"] = mbs.dic_pstat[study][par][ 'PSRF'] except: mbs.summary[par][study]["PSRF"] = 10000 if mbs.summary[par][study]["avgESS"] < 90: sAvgESS = "avgESS=%3.1f **" % mbs.summary[par][ study]["avgESS"] else: sAvgESS = "avgESS=%3.1f" % mbs.summary[par][ study]["avgESS"] if (mbs.summary[par][study]["PSRF"] - 1) > .1: sPSRF = " PSRF=%1.2f ***" % mbs.summary[par][ study]["PSRF"] else: sPSRF = " PSRF=%1.2f" % mbs.summary[par][ study]["PSRF"] if cntLine == 0: ax = plt.subplot2grid((numLines, numCols), (0, cntCols), rowspan=4, colspan=10) else: ax = plt.subplot2grid((numLines, numCols), (6, cntCols), rowspan=4, colspan=10) if par == "LnL": print par, study, mu, sigma, hmu, hsigma stri_summary = mbs.show_lnl( desk, plt, ax, study, sAvgESS, sPSRF, cntLine, cntCols, min_max, alig_cons, stri_summary) else: print par, study, mu, sigma mbs.show_distrib(plt, ax, par, study, sAvgESS, sPSRF, ticks, desk.colors, iPar, mini, maxi, cntCols) listData.append(vals) cntLine += 1 except: pass if didFind: if len(listData) == 2: if par == "LnL": mbs.show_LRT(alig_cons, par, plt, numLines, numCols, cntCols) else: mbs.show_conf_interval(min_max, alig_cons, par, plt, numLines, numCols, cntCols, ticks, ticks0, mini, maxi, listData) mbs.show_qqplot2(min_max, alig_cons, par, plt, numLines, numCols, cntCols, listData) else: pass cntCols += 12 if par == "LnL": stri = "" else: f_value, p_value = mbs.calc_anova(seqXs) if p_value <= 0.05: stri = ', at least one distribution is statistically different.' else: stri = ', the distributions are statistically similar.' stri += 'ANOVA: f-value %2.3e p_value %2.3e' % (f_value, p_value) left = .05 top = .97 fig.text(left, top, par + stri, color="red") stri = mbs.ttest_summary(par) print stri sPar = par.replace(">", "").replace("<", "") pictureName = "%s_%s_mr_bayes_analisis_param_%s" % ( desk.organism, desk.gene_title, sPar) self.myPlot.print_graph(self.desk, fig, pictureName, frame=self.desk.tk_root, stay=False) print stri_summary if desk.saveData: mbs.save_params(params) self.failure = False return