예제 #1
0
    def looping(self, desk, opt):

        plt.close("all")
        plt.clf()

        desk.withCorrection = opt[0]
        desk.minmax = opt[1]

        if desk.withCorrection:
            self.str_correction = '-bias corr.'
            self.filename_correction = '_bias_corr'
        else:
            self.str_correction = ''
            self.filename_correction = ''

        print("\n--->>>", desk.minmax, self.str_correction)

        desk.colorThreshold = desk.colorThreshold_var.get()

        if desk.mnat:
            desk.unit = 'mnat'
            desk.factor = 1000
            desk.roundVal = 2
        else:
            desk.unit = 'nat'
            desk.factor = 1
            desk.roundVal = 4

        if desk.vert_horiz == 'HMI':
            xLabel = 'JSD(HMI) (%s)' % (desk.unit)

            title = "Hierarchical Cluster Method=%s of JSD(HMI)- %s %s %s"\
             %(desk.cluster_method_desc, desk.organism, desk.seqType, desk.gene)

            if desk.frame > 0:
                title += '\nJSD(HMI) %s%s, desk.frame %i, #letter %i, min(L)=%i, min(#seqs)=%i' % \
                (desk.minmax, self.str_correction, desk.frame, desk.numOfLetters, desk.cutoffLength, desk.cutoffNumSeq)
            else:
                title += '\n%s%s, letter %i, min(L)=%i, min(#seqs)=%i' % \
                (desk.minmax, self.str_correction, desk.numOfLetters, desk.cutoffLength, desk.cutoffNumSeq)
        elif desk.vert_horiz == 'VMI':
            xLabel = 'JSD(VMI) (%s)' % (desk.unit)
            ''' multidimensional distance '''
            title = "Hierarchical Cluster Method=%s of JSD(VMI), %s %s %s"\
                %(desk.cluster_method_desc, desk.organism, desk.seqType, desk.gene)

            title += '\n%s%s, #letter %i, min(L)=%i, min(#seqs)=%i' % \
                (desk.minmax, self.str_correction, desk.numOfLetters, desk.cutoffLength, desk.cutoffNumSeq)

        else:
            xLabel = 'JSD(VSH) (nat)'
            ''' multidimensional distance '''
            title = "Hierarchical Cluster Method=%s of JSD(VSH), %s %s %s"\
                %(desk.cluster_method_desc, desk.organism, desk.seqType, desk.gene)

            title += '\n%s%s, #letter %i, min(L)=%i, min(#seqs)=%i' % \
                (desk.minmax, self.str_correction, desk.numOfLetters, desk.cutoffLength, desk.cutoffNumSeq)

        desk.set_cluster_filenames()
        filename = desk.cluster_input_filename

        ret, _, colHeaders, dataMatrix = self.open_distance_matrix_file(
            desk.rootTable + filename)
        if not ret:
            self.error_msg = 'Could not find %s' % (desk.rootTable + filename)
            return False

        pictureName = 'Cluster_' + filename.replace('.txt', '')
        ''' desk.dr defined in pipe_desktop get_params() '''
        if desk.dr:
            rows = desk.dr.labels(colHeaders)
        else:
            rows = colHeaders

        #convert native python array into a numpy array
        # dataMatrix = log10(dataMatrix)
        # print dataMatrix

        dataMatrix = np.array(dataMatrix)
        maxDist = 0
        if desk.factor != 1:
            for i in range(len(dataMatrix)):
                for j in range(len(dataMatrix[i])):
                    dataMatrix[i][j] = dataMatrix[i][j] * desk.factor
                    if dataMatrix[i][j] > maxDist:
                        maxDist = dataMatrix[i][j]
        else:
            for i in range(len(dataMatrix)):
                for j in range(len(dataMatrix[i])):
                    if dataMatrix[i][j] > maxDist:
                        maxDist = dataMatrix[i][j]

        # single, weighted, average, co    mplete
        linkageMatrix = linkage(dataMatrix,
                                method=desk.cluster_method,
                                metric='euclidean')
        ''' finding maximum '''
        maxLinkDist = 0
        for i in range(len(linkageMatrix)):
            for j in range(len(linkageMatrix[i])):
                if linkageMatrix[i][j] > maxLinkDist:
                    maxLinkDist = linkageMatrix[i][j]
        ''' hierarchical cluster distorce distances
        factor = maxDist/(2*maxLinkDist) '''

        for i in range(len(linkageMatrix)):
            linkageMatrix[i][2] = round(linkageMatrix[i][2] * .5,
                                        desk.roundVal)

        fig = plt.figure(1, dpi=desk.dpi)
        ax = fig.add_subplot('111')

        plt.subplots_adjust(bottom=.1, left=.05, right=.84)

        yLabel = 'species'

        plt.rcParams['lines.linewidth'] = 2.5
        fontsize = 26

        plt.title(title, fontsize=fontsize)
        ax.set_xlabel(xLabel, fontsize=fontsize)
        ax.set_ylabel(yLabel, fontsize=fontsize)

        # make colorbar labels bigger
        leaf_font_size = 28
        ''' ddata = '''
        try:
            dendrogram(
                linkageMatrix,
                color_threshold=desk.colorThreshold,
                labels=rows,
                orientation='right'
            )  # show_leaf_counts=True   , leaf_font_size=leaf_font_size
        except:
            print("Failed in printing dendrogram")
            pass

        plt.xticks(fontsize=leaf_font_size)
        plt.yticks(fontsize=leaf_font_size)
        '''
        # print ddata
        spList = ddata['ivl']
        # print len(spList), spList
        nickList = copy.deepcopy(spList)
        nickList.sort()
        
        dic = {}
        for i in range(len(spList)):
            sp = spList[i]
            for j in range(len(nickList)):
                if sp == nickList[j]:
                    dic[i] = j
                    #print i, spList[i], ' equal ',j, nickList[j]
                    break
        
        
        count = 0
        for i, d in zip(ddata['icoord'], ddata['dcoord']):
            count += 1
            # print i, d
        
            # specie01 x specie02 - mean error distance 
            num  = (i[0]-5)/10.
            sp1a = int(num)
            diff = num - sp1a
        
            if diff == 0:
                wei1a = 1
                sp1b  = sp1a
                wei1b = 0
            else:
                sp1b = sp1a+1
                wei1a = diff
                wei1b = 1. - wei1a
                
            #if num == 0:
            #    print '>>>> viri'
            num  = (i[2]-5)/10.
            sp2a = int(num)
            diff = num - sp2a
        
            if diff == 0:
                sp2b  = sp2a
                wei2a = 1
                wei2b = 0
            else:
                sp2b = sp2a+1
                wei2a = diff
                wei2b = 1. - wei2a
                
            #print sp1a, sp1b, sp2a, sp2b
            #print wei1a, wei1b, wei2a, wei2b
        
        
            ste = 0.
            if wei1a>0 and wei2a>0:
                ste += wei1a*wei2a*seMatrix[dic[sp1a]][dic[sp2a]]  
            if wei1a>0 and wei2b>0:
                ste += wei1a*wei2b*seMatrix[dic[sp1a]][dic[sp2b]]  
            if wei1b>0 and wei2a>0:
                ste += wei1b*wei2a*seMatrix[dic[sp1b]][dic[sp2a]]  
            if wei1b>0 and wei2b>0:
                # print sp1b, sp2b
                ste += wei1b*wei2b*seMatrix[dic[sp1b]][dic[sp2b]]
            
            ste = round(ste,4)
        
            dist = seMatrix[dic[sp1a]][dic[sp2a]]
            dist = round(dist,4)
            
            # print 'dist', dist, 'ste', ste
        
            x = 0.5 * sum(i[1:3])
            y = round(d[1],4)
            stry = str(y) + '\nd='+str(dist) + '\nse='+str(ste)
            plt.plot(x, y, 'ro')
            stry = ''
            if abs(y) > desk.colorThreshold:
                plt.annotate(stry, (x, y), xytext=(0, -8),
                             textcoords='offset points',
                             va='top', ha='center')
        '''

        self.myPlot = graphPac.Plot()
        self.myPlot.print_graph(desk,
                                fig,
                                pictureName=pictureName,
                                frame=desk.tk_root,
                                stay=True)

        return True
예제 #2
0
    def __init__(self, desk):

        self.desk = desk
        self.failure = True
        self.error_msg = ''

        try:
            desk.get_params()
        except:
            self.error_msg = 'Could not get parameters.'
            return

        mbs = mb.mrBayesClass(desk)

        if not mbs.read_runPs():
            self.error_msg = 'Problems reading Mr.Bayes files'
            return

        if not mbs.stat_files_to_dic():
            self.error_msg = 'Could not read Mr.Bayes statistic files'
            return

        mbs.summary = {}

        params = []
        if desk.piA_var.get(): params.append("pi(A)")
        if desk.piC_var.get(): params.append("pi(C)")
        if desk.piG_var.get(): params.append("pi(G)")
        if desk.piT_var.get(): params.append("pi(T)")
        if desk.rAC_var.get(): params.append("r(A<->C)")
        if desk.rAG_var.get(): params.append("r(A<->G)")
        if desk.rAT_var.get(): params.append("r(A<->T)")
        if desk.rCG_var.get(): params.append("r(C<->G)")
        if desk.rCT_var.get(): params.append("r(C<->T)")
        if desk.rGT_var.get(): params.append("r(G<->T")
        if desk.LnL_var.get(): params.append("LnL")
        if desk.LnPr_var.get(): params.append("LnPr")
        if desk.TL_var.get(): params.append("TL")
        if desk.alpha_var.get(): params.append("alpha")
        if desk.off_on_var.get(): params.append("s(off->on)")
        if desk.on_off_var.get(): params.append("s(on->off)")
        if desk.pinvar_var.get(): params.append("pinvar")

        if not params:
            self.error_msg = 'Define at least one param.'
            return

        iPar = 0

        numCols = 46
        numLines = 16
        self.myPlot = graphPac.Plot()

        for par in params:
            iPar += 1
            mbs.summary[par] = {}

            #fig = figList[iPar]
            fig = plt.figure(iPar)
            plt.subplots_adjust(left=0.05, right=0.95, bottom=0.05, top=0.90)

            # print plt.get_backend()
            mng = plt.get_current_fig_manager()
            mng.window.wm_geometry("1400x900+50+50")

            seqXs = []
            mini, maxi = float('inf'), -float('inf')

            for min_max in mbs.min_maxs:
                for alig_cons in mbs.alig_conss:
                    for std_covar in mbs.std_covars:

                        study = min_max + '-' + alig_cons + '-' + std_covar

                        try:
                            vals = np.array(mbs.dic_detaild_params[study][par])

                            if np.min(vals) < mini:
                                mini = min(vals)
                            elif np.max(vals) > maxi:
                                maxi = max(vals)
                        except:
                            pass

            if par != "LnL" and par != "LnPr":
                if par == "alpha":
                    mini = 0
                else:
                    if mini < 0: mini = 0

            if par in ["pi(A)", "pi(C)", "pi(G)", "pi(T)"]:
                if maxi > 1: maxi = 1

            ticks = []
            ticks0 = []
            div = 5
            delta = (maxi - mini) / float(div - 1)
            for i in range(div):
                ticks.append(round(mini + delta * i, 2))
                ticks0.append(0)

            cntCols = 0
            stri_summary = "   \tAligned\tConsensus\n"
            for min_max in mbs.min_maxs:
                for alig_cons in mbs.alig_conss:

                    listData = []

                    cntLine = 0

                    for std_covar in mbs.std_covars:
                        try:
                            _ = mbs.mb_filenames[min_max][alig_cons][std_covar]
                            didFind = True
                        except:
                            didFind = False
                            continue

                        study = min_max + '-' + alig_cons + '-' + std_covar
                        '''
                        try:
                            dic = mbs.dic_detaild_params[study]
                        except:
                            # if file not found or could not read: it is not in dic_detailed_params 
                            continue
                        '''

                        mbs.summary[par][study] = {}

                        try:
                            vals = np.array(mbs.dic_detaild_params[study][par])

                            mbs.summary[par][study] = {}
                            N = len(vals)
                            seqXs.append(np.array(vals))

                            mu, sigma, hmu, hsigma, q025, _, q2, _, q975 = mbs.stat_ppf_decimal(
                                vals, par)

                            mbs.summary[par][study]["N"] = N
                            mbs.summary[par][study]["mu"] = mu
                            mbs.summary[par][study]["sigma"] = sigma
                            SE = sigma / np.sqrt(N)
                            mbs.summary[par][study]["SE"] = SE
                            mbs.summary[par][study]["N"] = N
                            mbs.summary[par][study]["median"] = q2
                            mbs.summary[par][study]["q025"] = q025
                            mbs.summary[par][study]["q975"] = q975

                            if hmu:
                                mbs.summary[par][study]["hmu"] = hmu
                                mbs.summary[par][study]["hsigma"] = hsigma
                                hSE = hsigma / np.sqrt(N)
                                mbs.summary[par][study]["hSE"] = hSE

                            mbs.summary[par][study]["min"] = np.min(vals)
                            mbs.summary[par][study]["max"] = np.max(vals)

                            try:
                                ''' LnL doesn't have ESS and PSRF - adopted from TL
                                    total tree length (the sum of all branch lengths, TL) '''
                                if par == "LnL":
                                    mbs.summary[par][study][
                                        "avgESS"] = mbs.dic_pstat[study]["TL"][
                                            'avgESS']
                                else:
                                    mbs.summary[par][study][
                                        "avgESS"] = mbs.dic_pstat[study][par][
                                            'avgESS']
                            except:
                                mbs.summary[par][study]["avgESS"] = 0

                            try:
                                if par == "LnL":
                                    mbs.summary[par][study][
                                        "PSRF"] = mbs.dic_pstat[study]["TL"][
                                            'PSRF']
                                else:
                                    mbs.summary[par][study][
                                        "PSRF"] = mbs.dic_pstat[study][par][
                                            'PSRF']
                            except:
                                mbs.summary[par][study]["PSRF"] = 10000

                            if mbs.summary[par][study]["avgESS"] < 90:
                                sAvgESS = "avgESS=%3.1f **" % mbs.summary[par][
                                    study]["avgESS"]
                            else:
                                sAvgESS = "avgESS=%3.1f" % mbs.summary[par][
                                    study]["avgESS"]

                            if (mbs.summary[par][study]["PSRF"] - 1) > .1:
                                sPSRF = " PSRF=%1.2f ***" % mbs.summary[par][
                                    study]["PSRF"]
                            else:
                                sPSRF = " PSRF=%1.2f" % mbs.summary[par][
                                    study]["PSRF"]

                            if cntLine == 0:
                                ax = plt.subplot2grid((numLines, numCols),
                                                      (0, cntCols),
                                                      rowspan=4,
                                                      colspan=10)
                            else:
                                ax = plt.subplot2grid((numLines, numCols),
                                                      (6, cntCols),
                                                      rowspan=4,
                                                      colspan=10)

                            if par == "LnL":
                                print par, study, mu, sigma, hmu, hsigma
                                stri_summary = mbs.show_lnl(
                                    desk, plt, ax, study, sAvgESS, sPSRF,
                                    cntLine, cntCols, min_max, alig_cons,
                                    stri_summary)
                            else:
                                print par, study, mu, sigma
                                mbs.show_distrib(plt, ax, par, study, sAvgESS,
                                                 sPSRF, ticks, desk.colors,
                                                 iPar, mini, maxi, cntCols)

                            listData.append(vals)
                            cntLine += 1
                        except:
                            pass

                    if didFind:
                        if len(listData) == 2:
                            if par == "LnL":
                                mbs.show_LRT(alig_cons, par, plt, numLines,
                                             numCols, cntCols)
                            else:
                                mbs.show_conf_interval(min_max, alig_cons, par,
                                                       plt, numLines, numCols,
                                                       cntCols, ticks, ticks0,
                                                       mini, maxi, listData)

                            mbs.show_qqplot2(min_max, alig_cons, par, plt,
                                             numLines, numCols, cntCols,
                                             listData)

                        else:
                            pass

                        cntCols += 12

            if par == "LnL":
                stri = ""
            else:
                f_value, p_value = mbs.calc_anova(seqXs)
                if p_value <= 0.05:
                    stri = ', at least one distribution is statistically different.'
                else:
                    stri = ', the distributions are statistically similar.'

                stri += 'ANOVA: f-value %2.3e   p_value %2.3e' % (f_value,
                                                                  p_value)

            left = .05
            top = .97
            fig.text(left, top, par + stri, color="red")

            stri = mbs.ttest_summary(par)
            print stri

            sPar = par.replace(">", "").replace("<", "")
            pictureName = "%s_%s_mr_bayes_analisis_param_%s" % (
                desk.organism, desk.gene_title, sPar)
            self.myPlot.print_graph(self.desk,
                                    fig,
                                    pictureName,
                                    frame=self.desk.tk_root,
                                    stay=False)

        print stri_summary

        if desk.saveData:
            mbs.save_params(params)

        self.failure = False
        return