def looping(self, desk, opt): plt.close("all") plt.clf() desk.withCorrection = opt[0] desk.minmax = opt[1] if desk.withCorrection: self.str_correction = '-bias corr.' self.filename_correction = '_bias_corr' else: self.str_correction = '' self.filename_correction = '' print("\n--->>>", desk.minmax, self.str_correction) desk.colorThreshold = desk.colorThreshold_var.get() if desk.mnat: desk.unit = 'mnat' desk.factor = 1000 desk.roundVal = 2 else: desk.unit = 'nat' desk.factor = 1 desk.roundVal = 4 if desk.vert_horiz == 'HMI': xLabel = 'JSD(HMI) (%s)' % (desk.unit) title = "Hierarchical Cluster Method=%s of JSD(HMI)- %s %s %s"\ %(desk.cluster_method_desc, desk.organism, desk.seqType, desk.gene) if desk.frame > 0: title += '\nJSD(HMI) %s%s, desk.frame %i, #letter %i, min(L)=%i, min(#seqs)=%i' % \ (desk.minmax, self.str_correction, desk.frame, desk.numOfLetters, desk.cutoffLength, desk.cutoffNumSeq) else: title += '\n%s%s, letter %i, min(L)=%i, min(#seqs)=%i' % \ (desk.minmax, self.str_correction, desk.numOfLetters, desk.cutoffLength, desk.cutoffNumSeq) elif desk.vert_horiz == 'VMI': xLabel = 'JSD(VMI) (%s)' % (desk.unit) ''' multidimensional distance ''' title = "Hierarchical Cluster Method=%s of JSD(VMI), %s %s %s"\ %(desk.cluster_method_desc, desk.organism, desk.seqType, desk.gene) title += '\n%s%s, #letter %i, min(L)=%i, min(#seqs)=%i' % \ (desk.minmax, self.str_correction, desk.numOfLetters, desk.cutoffLength, desk.cutoffNumSeq) else: xLabel = 'JSD(VSH) (nat)' ''' multidimensional distance ''' title = "Hierarchical Cluster Method=%s of JSD(VSH), %s %s %s"\ %(desk.cluster_method_desc, desk.organism, desk.seqType, desk.gene) title += '\n%s%s, #letter %i, min(L)=%i, min(#seqs)=%i' % \ (desk.minmax, self.str_correction, desk.numOfLetters, desk.cutoffLength, desk.cutoffNumSeq) desk.set_cluster_filenames() filename = desk.cluster_input_filename ret, _, colHeaders, dataMatrix = self.open_distance_matrix_file( desk.rootTable + filename) if not ret: self.error_msg = 'Could not find %s' % (desk.rootTable + filename) return False pictureName = 'Cluster_' + filename.replace('.txt', '') ''' desk.dr defined in pipe_desktop get_params() ''' if desk.dr: rows = desk.dr.labels(colHeaders) else: rows = colHeaders #convert native python array into a numpy array # dataMatrix = log10(dataMatrix) # print dataMatrix dataMatrix = np.array(dataMatrix) maxDist = 0 if desk.factor != 1: for i in range(len(dataMatrix)): for j in range(len(dataMatrix[i])): dataMatrix[i][j] = dataMatrix[i][j] * desk.factor if dataMatrix[i][j] > maxDist: maxDist = dataMatrix[i][j] else: for i in range(len(dataMatrix)): for j in range(len(dataMatrix[i])): if dataMatrix[i][j] > maxDist: maxDist = dataMatrix[i][j] # single, weighted, average, co mplete linkageMatrix = linkage(dataMatrix, method=desk.cluster_method, metric='euclidean') ''' finding maximum ''' maxLinkDist = 0 for i in range(len(linkageMatrix)): for j in range(len(linkageMatrix[i])): if linkageMatrix[i][j] > maxLinkDist: maxLinkDist = linkageMatrix[i][j] ''' hierarchical cluster distorce distances factor = maxDist/(2*maxLinkDist) ''' for i in range(len(linkageMatrix)): linkageMatrix[i][2] = round(linkageMatrix[i][2] * .5, desk.roundVal) fig = plt.figure(1, dpi=desk.dpi) ax = fig.add_subplot('111') plt.subplots_adjust(bottom=.1, left=.05, right=.84) yLabel = 'species' plt.rcParams['lines.linewidth'] = 2.5 fontsize = 26 plt.title(title, fontsize=fontsize) ax.set_xlabel(xLabel, fontsize=fontsize) ax.set_ylabel(yLabel, fontsize=fontsize) # make colorbar labels bigger leaf_font_size = 28 ''' ddata = ''' try: dendrogram( linkageMatrix, color_threshold=desk.colorThreshold, labels=rows, orientation='right' ) # show_leaf_counts=True , leaf_font_size=leaf_font_size except: print("Failed in printing dendrogram") pass plt.xticks(fontsize=leaf_font_size) plt.yticks(fontsize=leaf_font_size) ''' # print ddata spList = ddata['ivl'] # print len(spList), spList nickList = copy.deepcopy(spList) nickList.sort() dic = {} for i in range(len(spList)): sp = spList[i] for j in range(len(nickList)): if sp == nickList[j]: dic[i] = j #print i, spList[i], ' equal ',j, nickList[j] break count = 0 for i, d in zip(ddata['icoord'], ddata['dcoord']): count += 1 # print i, d # specie01 x specie02 - mean error distance num = (i[0]-5)/10. sp1a = int(num) diff = num - sp1a if diff == 0: wei1a = 1 sp1b = sp1a wei1b = 0 else: sp1b = sp1a+1 wei1a = diff wei1b = 1. - wei1a #if num == 0: # print '>>>> viri' num = (i[2]-5)/10. sp2a = int(num) diff = num - sp2a if diff == 0: sp2b = sp2a wei2a = 1 wei2b = 0 else: sp2b = sp2a+1 wei2a = diff wei2b = 1. - wei2a #print sp1a, sp1b, sp2a, sp2b #print wei1a, wei1b, wei2a, wei2b ste = 0. if wei1a>0 and wei2a>0: ste += wei1a*wei2a*seMatrix[dic[sp1a]][dic[sp2a]] if wei1a>0 and wei2b>0: ste += wei1a*wei2b*seMatrix[dic[sp1a]][dic[sp2b]] if wei1b>0 and wei2a>0: ste += wei1b*wei2a*seMatrix[dic[sp1b]][dic[sp2a]] if wei1b>0 and wei2b>0: # print sp1b, sp2b ste += wei1b*wei2b*seMatrix[dic[sp1b]][dic[sp2b]] ste = round(ste,4) dist = seMatrix[dic[sp1a]][dic[sp2a]] dist = round(dist,4) # print 'dist', dist, 'ste', ste x = 0.5 * sum(i[1:3]) y = round(d[1],4) stry = str(y) + '\nd='+str(dist) + '\nse='+str(ste) plt.plot(x, y, 'ro') stry = '' if abs(y) > desk.colorThreshold: plt.annotate(stry, (x, y), xytext=(0, -8), textcoords='offset points', va='top', ha='center') ''' self.myPlot = graphPac.Plot() self.myPlot.print_graph(desk, fig, pictureName=pictureName, frame=desk.tk_root, stay=True) return True
def __init__(self, desk): self.desk = desk self.failure = True self.error_msg = '' try: desk.get_params() except: self.error_msg = 'Could not get parameters.' return mbs = mb.mrBayesClass(desk) if not mbs.read_runPs(): self.error_msg = 'Problems reading Mr.Bayes files' return if not mbs.stat_files_to_dic(): self.error_msg = 'Could not read Mr.Bayes statistic files' return mbs.summary = {} params = [] if desk.piA_var.get(): params.append("pi(A)") if desk.piC_var.get(): params.append("pi(C)") if desk.piG_var.get(): params.append("pi(G)") if desk.piT_var.get(): params.append("pi(T)") if desk.rAC_var.get(): params.append("r(A<->C)") if desk.rAG_var.get(): params.append("r(A<->G)") if desk.rAT_var.get(): params.append("r(A<->T)") if desk.rCG_var.get(): params.append("r(C<->G)") if desk.rCT_var.get(): params.append("r(C<->T)") if desk.rGT_var.get(): params.append("r(G<->T") if desk.LnL_var.get(): params.append("LnL") if desk.LnPr_var.get(): params.append("LnPr") if desk.TL_var.get(): params.append("TL") if desk.alpha_var.get(): params.append("alpha") if desk.off_on_var.get(): params.append("s(off->on)") if desk.on_off_var.get(): params.append("s(on->off)") if desk.pinvar_var.get(): params.append("pinvar") if not params: self.error_msg = 'Define at least one param.' return iPar = 0 numCols = 46 numLines = 16 self.myPlot = graphPac.Plot() for par in params: iPar += 1 mbs.summary[par] = {} #fig = figList[iPar] fig = plt.figure(iPar) plt.subplots_adjust(left=0.05, right=0.95, bottom=0.05, top=0.90) # print plt.get_backend() mng = plt.get_current_fig_manager() mng.window.wm_geometry("1400x900+50+50") seqXs = [] mini, maxi = float('inf'), -float('inf') for min_max in mbs.min_maxs: for alig_cons in mbs.alig_conss: for std_covar in mbs.std_covars: study = min_max + '-' + alig_cons + '-' + std_covar try: vals = np.array(mbs.dic_detaild_params[study][par]) if np.min(vals) < mini: mini = min(vals) elif np.max(vals) > maxi: maxi = max(vals) except: pass if par != "LnL" and par != "LnPr": if par == "alpha": mini = 0 else: if mini < 0: mini = 0 if par in ["pi(A)", "pi(C)", "pi(G)", "pi(T)"]: if maxi > 1: maxi = 1 ticks = [] ticks0 = [] div = 5 delta = (maxi - mini) / float(div - 1) for i in range(div): ticks.append(round(mini + delta * i, 2)) ticks0.append(0) cntCols = 0 stri_summary = " \tAligned\tConsensus\n" for min_max in mbs.min_maxs: for alig_cons in mbs.alig_conss: listData = [] cntLine = 0 for std_covar in mbs.std_covars: try: _ = mbs.mb_filenames[min_max][alig_cons][std_covar] didFind = True except: didFind = False continue study = min_max + '-' + alig_cons + '-' + std_covar ''' try: dic = mbs.dic_detaild_params[study] except: # if file not found or could not read: it is not in dic_detailed_params continue ''' mbs.summary[par][study] = {} try: vals = np.array(mbs.dic_detaild_params[study][par]) mbs.summary[par][study] = {} N = len(vals) seqXs.append(np.array(vals)) mu, sigma, hmu, hsigma, q025, _, q2, _, q975 = mbs.stat_ppf_decimal( vals, par) mbs.summary[par][study]["N"] = N mbs.summary[par][study]["mu"] = mu mbs.summary[par][study]["sigma"] = sigma SE = sigma / np.sqrt(N) mbs.summary[par][study]["SE"] = SE mbs.summary[par][study]["N"] = N mbs.summary[par][study]["median"] = q2 mbs.summary[par][study]["q025"] = q025 mbs.summary[par][study]["q975"] = q975 if hmu: mbs.summary[par][study]["hmu"] = hmu mbs.summary[par][study]["hsigma"] = hsigma hSE = hsigma / np.sqrt(N) mbs.summary[par][study]["hSE"] = hSE mbs.summary[par][study]["min"] = np.min(vals) mbs.summary[par][study]["max"] = np.max(vals) try: ''' LnL doesn't have ESS and PSRF - adopted from TL total tree length (the sum of all branch lengths, TL) ''' if par == "LnL": mbs.summary[par][study][ "avgESS"] = mbs.dic_pstat[study]["TL"][ 'avgESS'] else: mbs.summary[par][study][ "avgESS"] = mbs.dic_pstat[study][par][ 'avgESS'] except: mbs.summary[par][study]["avgESS"] = 0 try: if par == "LnL": mbs.summary[par][study][ "PSRF"] = mbs.dic_pstat[study]["TL"][ 'PSRF'] else: mbs.summary[par][study][ "PSRF"] = mbs.dic_pstat[study][par][ 'PSRF'] except: mbs.summary[par][study]["PSRF"] = 10000 if mbs.summary[par][study]["avgESS"] < 90: sAvgESS = "avgESS=%3.1f **" % mbs.summary[par][ study]["avgESS"] else: sAvgESS = "avgESS=%3.1f" % mbs.summary[par][ study]["avgESS"] if (mbs.summary[par][study]["PSRF"] - 1) > .1: sPSRF = " PSRF=%1.2f ***" % mbs.summary[par][ study]["PSRF"] else: sPSRF = " PSRF=%1.2f" % mbs.summary[par][ study]["PSRF"] if cntLine == 0: ax = plt.subplot2grid((numLines, numCols), (0, cntCols), rowspan=4, colspan=10) else: ax = plt.subplot2grid((numLines, numCols), (6, cntCols), rowspan=4, colspan=10) if par == "LnL": print par, study, mu, sigma, hmu, hsigma stri_summary = mbs.show_lnl( desk, plt, ax, study, sAvgESS, sPSRF, cntLine, cntCols, min_max, alig_cons, stri_summary) else: print par, study, mu, sigma mbs.show_distrib(plt, ax, par, study, sAvgESS, sPSRF, ticks, desk.colors, iPar, mini, maxi, cntCols) listData.append(vals) cntLine += 1 except: pass if didFind: if len(listData) == 2: if par == "LnL": mbs.show_LRT(alig_cons, par, plt, numLines, numCols, cntCols) else: mbs.show_conf_interval(min_max, alig_cons, par, plt, numLines, numCols, cntCols, ticks, ticks0, mini, maxi, listData) mbs.show_qqplot2(min_max, alig_cons, par, plt, numLines, numCols, cntCols, listData) else: pass cntCols += 12 if par == "LnL": stri = "" else: f_value, p_value = mbs.calc_anova(seqXs) if p_value <= 0.05: stri = ', at least one distribution is statistically different.' else: stri = ', the distributions are statistically similar.' stri += 'ANOVA: f-value %2.3e p_value %2.3e' % (f_value, p_value) left = .05 top = .97 fig.text(left, top, par + stri, color="red") stri = mbs.ttest_summary(par) print stri sPar = par.replace(">", "").replace("<", "") pictureName = "%s_%s_mr_bayes_analisis_param_%s" % ( desk.organism, desk.gene_title, sPar) self.myPlot.print_graph(self.desk, fig, pictureName, frame=self.desk.tk_root, stay=False) print stri_summary if desk.saveData: mbs.save_params(params) self.failure = False return