Python PSPFunc Examples

Programming Language: Python

Class/Type: PSPFunc

Examples at hotexamples.com: 5

Python PSPFunc - 5 examples found. These are the top rated real world Python examples of PSPFunc extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

LRT(3)

nbNode(2)

getParams(1)

pspFileCreation(1)

supBoot(1)

Example #1

Show file

File: SiteAnalysis.py Project: KTilton/DGINN

def bppSite(bppFile, bppMixed, alnFile, alnFormat, treeFile, lModels, outDir,
            baseName, logger):
    # outDir=os.getcwd()+"/"  # used to debug
    logger.info(os.getcwd())
    ### SITE ANALYSIS: BIO++
    logger.info("Bio++ Site Analysis")
    logger.info("Models to be run: {:s}".format(", ".join(
        model for model in lModels)))
    logger.info("Bppml parameter file: {:s}".format(bppFile))

    nodes = PSPFunc.nbNode(treeFile, logger)
    ## Bppml
    """ 
	Optimize tree and model using bppml
	Variables to include are
		INPUTFILE - alignement file
		FORMAT - format of the aln file (here, phyx)
		TREEFILE - tree file for the analyzed aln
		MODEL - choose which model you want run on the data YNGP_M0 through 8, same models as PAML, and DFP07 models
		NODES - number of nodes in the tree file
		IGNORE - parameters to ignore for optimization, for example if one is fixated (ex: omegas in M8a)
		OUTTREE - name of the optimized output tree
		OUTPARAMS - name of the output file summarizing parameters
		BACKUP - name of log file
	"""

    # Bppml output file names - dictionaries that associate model number with output file name for the model
    outSite = outDir + "bpp_site/"
    if not os.path.exists(outSite):
        subprocess.Popen("mkdir " + outSite, shell=True).wait()

    outFileName = outSite + baseName
    dModelTrees = {
        model: outFileName + "_" + model + ".dnd"
        for model in lModels
    }
    dModelParams = {
        model: outFileName + "_" + model + ".params"
        for model in lModels
    }
    dModelLog = {
        model: outFileName + "_optimization_" + model
        for model in lModels
    }
    dModelSyntax = {
        model: ["YNGP_" + model, "frequencies=F3X4(initFreqs=observed)"]
        for model in lModels if model[0] == "M"
    }  # dictionary model number - [MODEL name, MODEL arguments for bppml]
    dModelSyntax.update({
        model: [
            model[:5], "protmodel=JTT92",
            "frequencies=F3X4(initFreqs=observed)", "p0=1"
        ]
        for model in lModels if model[:5] == "DFP07"
    })
    dLogLlh = {}  # dictionary(model:logllh)

    for model in lModels:
        # take into account the specificities of each model (number of classes n for example)
        if model == "M7" or model == "M8":
            dModelSyntax[model].append("n=4")
        if model[0] == "M" and len(model) > 2:
            dModelSyntax[model][0] = dModelSyntax[model][0][:-1]
            dModelSyntax[model].append("omegas=1")

        # Use previous backup file (in order M0->M1->M2->M7->M8) to accelerate optimization
        # dictionary of equivalences of specific parameter names between models
        dequiv = {}
        ## omega from M0->M1->M2->M7 & M0->DFP07
        dequiv["omega"] = {
            "M1": {
                "YNGP_M1.omega": "omega"
            },
            "M2": {
                "YNGP_M2.omega0": "omega"
            },
            "M0": {
                "YN98.omega": "omega"
            },
            "M7": {
                "YNGP_M7.p": "[omega/(1-omega),1][omega==1]",
                "YNGP_M7.q": "1"
            },
            "M8": {
                "YNGP_M8.p": "[omega/(1-omega),1][omega==1]",
                "YNGP_M8.q": "1"
            },
            "DFP07_0": {
                "DFP07.omega": "omega"
            },
            "DFP07": {
                "DFP07.omega": "omega",
                "DFP07.p0": "0.1"
            }
        }  #0.1 to avoid optim stuck at p0=1
        dnewpar = {}

        if not os.path.exists(dModelLog[model]):
            prevmodel = ""
            if model[0] == "M":
                for prevmodel in ["M7", "M2", "M1", "M0"]:
                    if not prevmodel in lModels or not os.path.exists(
                            dModelLog[prevmodel] + ".def"):
                        prevmodel = ""
                    else:
                        break
            elif model[:5] == "DFP07":
                for prevmodel in ["DFP07_0", "M0"]:
                    if not prevmodel in lModels or not os.path.exists(
                            dModelLog[prevmodel] + ".def"):
                        prevmodel = ""
                    else:
                        break

            if prevmodel != "":
                logger.info("Optimization for model " + model +
                            " uses optimized parameters from model " +
                            prevmodel)
                fprev = open(dModelLog[prevmodel] + ".def", "r")
                lprev = list(fprev.readlines())
                fprev.close()

                dprevpar = {
                    l[:l.find("=")]: l[l.find("=") + 1:]
                    for l in lprev
                }

                # first copy all parameters
                for st, val in dprevpar.items():
                    if prevmodel == "M0":
                        if model[0] == "M":
                            nst = st.replace("YN98", "YNGP_" + model)
                        else:
                            nst = st.replace("YN98", "DFP07")
                    else:
                        nst = st.replace(prevmodel, model)

                    if not nst in dnewpar.keys():
                        dnewpar[nst] = val

                # And then for specific parameters
                for key, par in dequiv.items():
                    if model in par.keys() and prevmodel in par.keys():
                        parav = par[prevmodel]
                        parap = par[model]
                        for oname, oval in dprevpar.items():
                            ## look which oname is in equivalence list
                            for kparav in parav.keys():
                                if oname.startswith(kparav + "_"):
                                    for npar, nexp in parap.items():
                                        nname = oname.replace(kparav, npar)
                                        nval = str(
                                            eval(
                                                nexp.replace(key,
                                                             oval).strip()))
                                        if not nname in dnewpar.keys():
                                            dnewpar[nname] = nval

                #            break
                # write in backup file
                if len(dnewpar) != 0:
                    fnew = open(dModelLog[model], "w")
                    for k, v in dnewpar.items():
                        fnew.write(k + "=" + v.strip() + "\n")
                    fnew.close()

        # if M0 optimization in models, use tree optimized in M0 for subsequent model optimizations
        lignore = []
        if model != "M0" and "M0" in lModels:
            treeFile = dModelTrees["M0"] + "_1"
            lignore.append("BrLen")

        if model == "M8a":
            lignore.append("YNGP_M8.omegas*")

        if model == "DFP07_0":
            lignore.append("DFP07.p0_1")

# do not re-optimize root & equilibrium if done before
        if prevmodel != "":
            logger.info("Optimization for model " + model +
                        " does not re-optimize root frequencies")
            lignore.append("Ancient")

            logger.info("Optimization for model " + model +
                        " does not re-optimize equilibrium frequencies")
            lignore.append("*_Full.theta*")
        ignore = ",".join(lignore)

        # create dictionary with all elements of the two argument lists to build commands
        modelDesc = dModelSyntax[model][0] + "(" + ",".join(
            dModelSyntax[model][1:]) + ")"
        dBppCmd = {
            "INPUTFILE": alnFile,
            "FORMAT": alnFormat,
            "TREEFILE": treeFile,
            "MODEL": modelDesc,
            "NODES": nodes,
            "IGNORE": ignore,
            "OUTTREE": dModelTrees[model],
            "OUTPARAMS": dModelParams[model],
            "BACKUP": dModelLog[model],
            "param": bppFile
        }

        # running bppml
        logger.info("Running {:s} optimization".format(model))

        # join each couple of the cmd dictionary so that it reads "k1 = v1" "k2 = v2" etc...
        argsMx = "\"" + "\" \"".join([k + "=" + v
                                      for k, v in dBppCmd.items()]) + "\""
        logger.debug("bppml " + argsMx)
        runMx = subprocess.Popen("bppml " + argsMx,
                                 shell=True,
                                 stdout=subprocess.PIPE).wait()
        logger.debug(subprocess.PIPE)

        # fill dictionary with loglikelihoods of each model
        if os.path.exists(dModelParams[model]):
            with open(dModelParams[model], "r") as params:
                dLogLlh[model] = float(
                    params.readline().strip().split("= ")[-1])
                params.close()
                logger.info("Log Likelihood = {}".format(dLogLlh[model]))
        else:
            logger.info(
                "Possible failed optimization, likelihood has not been calculated."
            )

    # perform LRT
    # M1 vs M2
    if "M1" and "M2" in lModels:
        if "M1" and "M2" in dLogLlh:
            LR12, p12 = PSPFunc.LRT(dLogLlh["M1"], dLogLlh["M2"], 2)
            logger.info("LRT of M1 vs M2: {}".format(p12))
        else:
            logger.info(
                "Possible failed optimization, likelihoods of M1 and M2 have not been computed."
            )
    if "M7" and "M8" in lModels:
        if "M7" and "M8" in dLogLlh:
            LR78, p78 = PSPFunc.LRT(dLogLlh["M7"], dLogLlh["M8"], 2)
            logger.info("LRT of M7 vs M8: {}".format(p78))
        else:
            logger.info(
                "Possible failed optimization, likelihoods of M7 and M8 have not been computed."
            )
    if "M8" and "M8a" in lModels:
        if "M8" and "M8a" in dLogLlh:
            LR88a, p88a = PSPFunc.LRT(dLogLlh["M8a"], dLogLlh["M8"], 1)
            ts88a = 0.5 * p88a + 0.5
            logger.info("LRT of M8 vs M8a: {} (Treshold: {})".format(
                p88a, ts88a))
        else:
            logger.info(
                "Possible failed optimization, likelihoods have not been computed."
            )
    if "DFP07" and "DFP07_0" in lModels:
        if "DFP07" and "DFP07_0" in dLogLlh:
            LRDFP, pDFP = PSPFunc.LRT(dLogLlh["DFP07_0"], dLogLlh["DFP07"], 1)
            tsDFP = 0.5 * pDFP + 0.5
            logger.info("LRT of DFP07 vs DFP07_07: {} (Treshold: {})".format(
                pDFP, tsDFP))
        else:
            logger.info(
                "Possible failed optimization, likelihoods have not been computed."
            )

    # Bppmixedlikelihoods
    """ 
	Optimize tree and model using bppml
	Variables to include are
		INPUTFILE - alignement file
		FORMAT - format of the aln file (here, phyx)
		TREEFILE - tree file for the analyzed aln
		PARAMS - .params file from model optimization (bppml)
		OUTINFO - name of the results file (info about sites etc.)
	"""

    for model in lModels:
        # use tree optimized in M0 for each model
        if "M0" in lModels:
            treeFile = dModelTrees["M0"] + "_1"
        else:
            treeFile = dModelTrees[model] + "_1"

        if model in ["M0", "DFP07_0"]:
            continue

        # dictionary(model:results file name)
        dModelResults = {
            model: outSite + baseName + "_results_" + model + ".log"
            for model in lModels
        }

        dMixCmd = {
            "INPUTFILE": alnFile,
            "FORMAT": alnFormat,
            "TREEFILE": treeFile,
            "params": dModelParams[model],
            "OUTINFO": dModelResults[model],
            "param": bppMixed
        }

        logger.info("Running mixed likelihoods with model {:s}".format(model))
        argsMx = "\"" + "\" \"".join([k + "=" + v
                                      for k, v in dMixCmd.items()]) + "\""
        logger.debug("bppmixedlikelihoods " + argsMx)
        runMx = subprocess.Popen("bppmixedlikelihoods " + argsMx,
                                 shell=True,
                                 stdout=subprocess.PIPE).wait()
        logger.debug(subprocess.PIPE)

Example #2

Show file

def pspAnalysis(data, parms, aln, tree):
    """
	procedure which execute functions for psp step

	@param1 data: basicData object
	"""
    logger = logging.getLogger("main.positiveSelection")
    dCtrls, lModels = PSPFunc.getParams(parms["models"], parms["paml"],
                                        parms["bppml"],
                                        parms["mixedlikelihood"],
                                        parms["busted"], parms["meme"],
                                        parms["opb"], parms["gnh"])
    timeStamp = strftime("%Y%m%d%H%M", localtime())

    outDir = data.o + "positive_selection_results_" + timeStamp + "/"
    if not os.path.exists(outDir):
        os.makedirs(outDir)

    logger.info(":" + tree + ":")

    cladoFile = PSPFunc.supBoot(outDir, data.baseName, tree, logger)

    ### Terminal output for user
    logger.info("Output directory: {:s}".format(outDir))
    logger.info("Alignement: {:s}".format(aln))
    logger.info("Alignement is in {:s} format.".format(data.alnFormat))
    logger.info("Tree: {:s}".format(tree))

    ### Run the different analysis as determined by control file
    logger.info("Starting positive selection analyses.")
    logger.info("POSITIVE SELECTION ANALYSIS: ")
    logger.info("Analysis to be run:")

    dAnalysis = {
        "paml": "Site (codeml)",
        "BUSTED": "Whole-Gene",
        "bppml": "Site (Bio++ - Optimization)",
        "bppmixedlikelihood": "Site (Bio++ - Results)",
        "OPB": "Branch",
        "GNH": "Branch-site on positively selected branches",
        "MEME": "Branch-site"
    }
    for key in dCtrls.keys():
        logger.info(dAnalysis[key])

    if "BUSTED" in dCtrls:
        GeneAnalysis.hyphyBusted(aln, cladoFile, outDir, data.baseName, logger)
        """try:		
			GeneAnalysis.hyphyBusted(aln, cladoFile, outDir, data.baseName, logger)
		except Exception:
			logger.info("BUSTED encountered an unexpected error, skipping.")"""

    if "MEME" in dCtrls:
        try:
            BranchAnalysis.memeBranchSite(aln, cladoFile, outDir,
                                          data.baseName, logger)
        except Exception:
            logger.error("MEME encountered an unexpected error, skipping.")

    if "bppml" in dCtrls:
        #	  try:
        if not dCtrls["bppmixedlikelihood"]:
            dCtrls["bppmixedlikelihood"] = dCtrls["bppml"]
        SiteAnalysis.bppSite(dCtrls["bppml"], dCtrls["bppmixedlikelihood"],
                             aln, data.alnFormat, tree, lModels, outDir,
                             data.baseName, logger)
#	  except Exception:
#	    logger.error("Bio++ Site encountered an unexpected error, skipping.")

    lPSNodes = []
    if "OPB" in dCtrls:
        #		try:
        params = BranchAnalysis.bppBranch(dCtrls["OPB"], outDir, data.baseName,
                                          aln, data.alnFormat, tree, logger)
    # except Exception:
    # 	logger.error("Bio++ Branch Analysis encountered an unexpected error, skipping.")

    if "OPB" and "GNH" in dCtrls and len(lPSNodes) > 1:
        #		try:
        BranchAnalysis.bppBranchSite(dCtrls["GNH"], lPSNodes, outDir,
                                     data.baseName, aln, data.alnFormat, tree,
                                     logger)
    # except Exception:
    # 	logger.error("Bio++ Pseudo Branch-Site Analysis encountered an unexpected error, skipping.")

    if "paml" in dCtrls:
        SiteAnalysis.pamlSite(aln, tree, lModels, dCtrls["paml"], outDir,
                              data.baseName, logger)
        """try:
			SiteAnalysis.pamlSite(aln, tree, lModels, dCtrls["paml"], outDir, data.baseName, logger)
		except Exception:
			logger.info("PAML (codeml) Site encountered an unexpected error, skipping.")"""

    logger.info("Finished positive selection analyses.")
    return (outDir)

Example #3

Show file

def paramDef(params, inf, queryName, outdir):
    """
	Check the parameters in the file.

	@param inf: path's file
	@return defaultParam: dico of parameters
	"""

    params = params.strip()
    if not os.path.exists(params):
        print("The provided parameter file does not exist, try again.")
        sys.exit()

    #Parsing
    lParams = [
        "infile", "queryName", "queryFile", "blastdb", "outdir", "logfile",
        "evalue", "mincov", "percID", "maxLen", "step", "remote", "entryQuery",
        "sptree", "APIKey", "phymlOpt", "recombination", "duplication",
        "LBopt", "nbspecies", "positiveSelection", "basename", "hyphySeuil",
        "busted", "meme", "models", "paml", "bppml", "mixedlikelihood", "opb",
        "gnh"
    ]

    with open(params, mode="r", encoding="utf-8") as content:
        dParams = {}
        for line in content:
            if line.startswith("#"):
                pass
            else:
                temp = list(map(str.strip, line.split(":")))
                if temp[0] == "":
                    continue
                if temp[0] not in lParams:
                    print(temp[0] + " is not a valid parameter.\n")
                else:
                    dParams[temp[0]] = temp[1].strip()
        content.close()

    #If infile(s) given through command line, takes priority
    if inf != "":
        dParams["infile"] = list(map(str.strip, inf.split(",")))
    else:
        dParams["infile"] = list(map(str.strip, dParams["infile"].split(",")))

    #Idem queryName
    if queryName != "":
        dParams["queryName"] = queryName
    else:
        dParams["queryName"] = dParams["queryName"]

    #Idem outdir
    if outdir != "":
        dParams["outdir"] = outdir
    else:
        dParams["outdir"] = dParams["outdir"]

    #If list of file given, split and check what each file is
    if len(dParams["infile"]) > 1:
        for entryfile in dParams["infile"]:
            if FormatFunc.isCCDSFasta(entryfile):
                dParams["queryFile"] = os.path.abspath(entryfile)
            if FormatFunc.isAln(entryfile):
                dParams["alnfile"] = os.path.abspath(entryfile)
            if FormatFunc.isTree(entryfile):
                dParams["treefile"] = os.path.abspath(entryfile)
    else:
        dParams["queryFile"] = os.path.abspath(dParams["infile"][0])

    if "queryFile" in dParams.keys() and dParams["queryFile"] != "":
        dParams["infile"] = dParams["queryFile"]
    elif "alnfile" in dParams.keys() and dParams["alnfile"] != "":
        dParams["infile"] = dParams["alnfile"]

    answers = ["Y", "YES", "T", "TRUE"]
    negAnswers = ["N", "NO", "F", "FALSE"]

    for param in dParams.keys():
        if type(dParams[param]) is not list:
            if dParams[param].upper() in answers:
                dParams[param] = True
            elif dParams[param].upper() in negAnswers:
                dParams[param] = False

    #Check if parameters are correct
    lSteps = [
        "blast", "accessions", "fasta", "orf", "alignment", "tree",
        "duplication", "recombination", "positiveSelection", ""
    ]

    if "step" not in dParams or dParams["step"] not in lSteps:
        print("Step \"" + dParams["step"] +
              "\" not available, set to blast by default.")
        dParams["step"] = "blast"
    if dParams["step"] == "":
        dParams["step"] = "blast"

    if "remote" not in dParams or dParams["remote"] == "":
        print("Remote option needs to be a boolean, set to True by default.")
        dParams["remote"] = True

    if "positiveSelection" not in dParams:
        print(
            "Positive selection analyses will not be executed, set to False by default."
        )
        dParams["positiveSelection"] = False

    elif dParams["positiveSelection"]:
        if dParams["step"] == "positiveSelection":
            if "treefile" not in dParams or dParams["treefile"] == "":
                print(
                    "The pipeline requires a phylogenetic tree. Please provide one."
                )
                sys.exit()
            elif "alnfile" not in dParams or dParams["alnfile"] == "":
                print(
                    "The pipeline requires a codon alignment. Please provide one."
                )
                sys.exit()
        for opt in [
                "meme", "busted", "models", "paml", "bppml", "mixedlikelihood",
                "opb", "gnh"
        ]:
            if opt not in dParams:
                dParams[opt] = ""

            elif opt not in ["meme", "busted", "models", "paml"]:
                if type(dParams[opt]) is not bool and os.path.exists(
                        dParams[opt].strip("\n")):
                    dParams[opt] = dParams[opt].strip("\n")
                elif dParams[opt]:
                    path = "/".join(dParams["infile"].split("/")
                                    [:-1]) + "/" + opt + "_params.bpp"
                    PSPFunc.pspFileCreation(path, opt)
                    dParams[opt] = path

            elif opt == "models":
                ltemp = []
                for M in map(str.strip, dParams[opt].split(",")):
                    if M == "":
                        next
                    elif M not in [
                            "M0", "M1", "M2", "M7", "M8", "M8a", "DFP07_0",
                            "DFP07"
                    ]:
                        print(M + " isn't a valid model.")
                    else:
                        ltemp.append(M)
                dParams[opt] = ",".join(ltemp)

    elif dParams["step"] == "positiveSelection":
        print(
            "Error: positiveSelection option set to false and step set to positiveSelection."
        )
        sys.exit()

    if dParams["step"] in ["blast", "accessions", "fasta"]:
        if dParams["infile"] == "" or dParams["blastdb"] == "":
            print("Infile and Blastdb are necessary.")
            sys.exit()

    #Creation of a dictionnary with all the parameters
    defaultParam = {
        "infile": "",
        "queryName": "",
        "queryFile": "",
        "blastdb": "",
        "outdir": "",
        "logfile": "",
        "evalue": 1e-3,
        "mincov": 50,
        "percID": 70,
        "maxLen": "cutoff",
        "entryQuery": "",
        "APIKey": "",
        "phymlOpt": "",
        "sptree": "",
        "duplication": False,
        "LBopt": "cutoff",
        "nbspecies": 8,
        "recombination": False,
        "remote": False,
        "step": "blast",
        "positiveSelection": False,
        "alnfile": "",
        "treefile": "",
        "alnformat": "Fasta",
        "basename": "",
        "hyphySeuil": 0.05,
        "busted": False,
        "meme": False,
        "models": "",
        "paml": "",
        "bppml": "",
        "mixedlikelihood": "",
        "opb": False,
        "gnh": False
    }

    for i in defaultParam:
        if i in dParams.keys() and dParams[i] != "":
            defaultParam[i] = dParams[i]

    return defaultParam

Example #4

Show file

def bppBranch(OPBFile, outDir, baseName, alnFile, alnFormat, treeFile, logger):
    ### BRANCH ANALYSIS: BIO++ ONE PER BRANCH

    logger.info("One Per Branch (BIO++)")
    logger.info("OPB parameter file: {:s}".format(OPBFile))

    outOPB = outDir + "bpp_branch/"
    if not os.path.exists(outOPB):
        os.makedirs(outOPB)

    model = "M2"

    outFileName = outOPB + baseName
    outTree = outFileName + "_" + model + ".dnd"
    outParams = outFileName + "_" + model + ".params"
    outBackup = outFileName + "_optimization_" + model

    # create dictionary with all elements of the two argument lists to build commands
    dBppCmd = {
        "INPUTFILE":
        alnFile,
        "FORMAT":
        alnFormat,
        "TREEFILE":
        treeFile,
        "OUTTREE":
        outTree,
        "OUTPARAMS":
        outParams,
        "BACKUP":
        outBackup,
        "model1":
        "YNGP_" + model + "(frequencies=F3X4(initFreqs=observed))",
        "param":
        OPBFile,
        "process1":
        "OnePerBranch(model=1, tree=1, rate=1, root_freq=1, shared_parameters=(*kappa, *Full.theta*))"
    }
    # running bppml
    logger.info("Running Branch optimization")

    ### look for previous M0 optim
    outSite = outDir + "bpp_site/"
    outSiteFileName = outSite + baseName

    lModels = ["M0", "M2"]
    dPrevModelLog = {
        model: outSiteFileName + "_optimization_" + model
        for model in lModels
    }
    prevmodel, dnewpar = getNewParfromOptim(model, lModels, dPrevModelLog,
                                            logger)
    lignore = []
    if prevmodel != "":
        fnew = open(outBackup, "w")
        for k, v in dnewpar.items():
            fnew.write(k + "=" + v.strip() + "\n")
        fnew.close()
    lignore = setIgnoreParams(model, prevmodel, lModels, logger)

    dBppCmd["IGNORE"] = ",".join(lignore)

    # join each couple of the cmd dictionary so that it reads "k1 = v1" "k2 = v2" etc...
    argsOPB = "bppml \"" + "\" \"".join(
        [k + "=" + v for k, v in dBppCmd.items()]) + "\""
    logger.debug(argsOPB)
    runOPB = cmd(argsOPB, False)

    # test each branch
    # Scan all parameter names
    fback = open(dBppCmd["BACKUP"] + ".def", "r")
    dparam = {
        param.split("=")[0]: float(param.split("=")[1])
        for param in fback.readlines()
    }
    fback.close()

    valM2 = float(dparam["f(x)"])

    # cp outParams for each branch with M2 replaced with M1
    # only for the where theta1 < 0.999 & theta2 < 0.999

    fparam = open(outParams, "r")
    lcmd = [l for l in fparam.readlines()]
    fparam.close()

    ## Look for correspondance  model_nb <-> node_id
    lprocess = [l for l in lcmd if l[:7] == "process"][0]
    lid = lprocess.split(".nodes_id=(")
    cormodid = {}
    for i in range(0, len(lid), 2):
        mod = int(lid[i][lid[i].rfind("l") + 1:])
        idi = int(lid[i + 1][:lid[i + 1].find(")"):])
        cormodid[idi] = mod

## Compute lk for each node with theta1_mod * theta2_mod < 0.999:
    fresbranch = open(outFileName + "_branch.txt", "w")
    fresbranch.write("Id\tomega2\tprop\tM2\tM1\tLR\tp\n")
    del (dBppCmd["process1"])
    del (dBppCmd["model1"])
    for idi, mod in cormodid.items():
        if dparam["YNGP_M2.theta2_%d" % mod] * dparam["YNGP_M2.theta1_%d" %
                                                      mod] >= 0.999:
            continue
        fback = open(outBackup + "_%d" % mod, "w")
        [
            fback.write(key + "=" + str(val) + "\n")
            for key, val in dparam.items() if key != "YNGP_M2.theta2_%d" % mod
        ]
        fback.write("YNGP_M2.theta2_%d=1\n" % mod)
        fback.write("YNGP_M2.omega2_%d=1\n" % mod)
        fback.close()

        lignore2 = lignore[:] + [
            key for key in dparam if key not in
            ["YNGP_M2.theta1_%d" %
             mod, "YNGP_M2.omega0_%d" % mod]
        ]
        dBppCmd["IGNORE"] = ",".join(lignore2)
        dBppCmd["params"] = outParams
        dBppCmd["OUTPARAMS"] = outParams + "_%d" % idi
        dBppCmd["BACKUP"] = outBackup + "_%d" % mod

        argsOPB = "bppml \"" + "\" \"".join(
            [k + "=" + v for k, v in dBppCmd.items()]) + "\""
        #	  logger.debug(argsOPB)
        runOPB = cmd(argsOPB, False)

        fback = open(dBppCmd["BACKUP"] + ".def", "r")
        dparam2 = {
            param.split("=")[0]: float(param.split("=")[1])
            for param in fback.readlines()
        }
        fback.close()

        valM1 = float(dparam2["f(x)"])
        LR, p = PSPFunc.LRT(valM2, valM1, 2)
        fresbranch.write(
            "%d\t%f\t%f\t%f\t%f\t%f\t%f\n" %
            (idi, dparam["YNGP_M2.omega2_%d" % mod],
             (1 - dparam["YNGP_M2.theta2_%d" % mod]) *
             (1 - dparam["YNGP_M2.theta1_%d" % mod]), valM2, valM1, LR, p))
        if p < 0.05:
            logger.info("Node {:d} is interesting (w = {:f})".format(
                idi, dparam["YNGP_M2.omega2_%d" % mod]))
    fresbranch.close()
    return (outParams)

Example #5

Show file

def bppSite(bppFile, bppMixed, alnFile, alnFormat, treeFile, lModels, outDir,
            baseName, logger):
    # outDir=os.getcwd()+"/"  # used to debug
    logger.info(os.getcwd())
    ### SITE ANALYSIS: BIO++
    logger.info("Bio++ Site Analysis")
    logger.info("Models to be run: {:s}".format(", ".join(
        model for model in lModels)))
    logger.info("Bppml parameter file: {:s}".format(bppFile))

    ## Bppml
    """ 
	Optimize tree and model using bppml
	Variables to include are
		INPUTFILE - alignement file
		FORMAT - format of the aln file (here, phyx)
		TREEFILE - tree file for the analyzed aln
		MODEL - choose which model you want run on the data YNGP_M0 through 8, same models as PAML, and DFP07 models
		IGNORE - parameters to ignore for optimization, for example if one is fixated (ex: omegas in M8a)
		OUTTREE - name of the optimized output tree
		OUTPARAMS - name of the output file summarizing parameters
		BACKUP - name of log file
	"""

    # Bppml output file names - dictionaries that associate model number with output file name for the model
    outSite = outDir + "bpp_site/"
    if not os.path.exists(outSite):
        subprocess.Popen("mkdir " + outSite, shell=True).wait()

    outFileName = outSite + baseName
    dModelTrees = {
        model: outFileName + "_" + model + ".dnd"
        for model in lModels
    }
    dModelParams = {
        model: outFileName + "_" + model + ".params"
        for model in lModels
    }
    dModelLog = {
        model: outFileName + "_optimization_" + model
        for model in lModels
    }
    dModelSyntax = {
        model: ["YNGP_" + model, "frequencies=F3X4(initFreqs=observed)"]
        for model in lModels if model[0] == "M"
    }  # dictionary model number - [MODEL name, MODEL arguments for bppml]
    dModelSyntax.update({
        model:
        [model[:5], "protmodel=JTT92", "frequencies=F3X4(initFreqs=observed)"]
        for model in lModels if model[:5] == "DFP07"
    })
    # take into account the specificities of each model (number of classes n for example)
    for model in lModels:
        if model in ["M7", "M8"]:
            dModelSyntax[model].append("n=4")
            dModelSyntax[model].append("q=1")
        if model[0] == "M" and len(model) > 2:
            dModelSyntax[model][0] = dModelSyntax[model][0][:-1]
            dModelSyntax[model].append("omegas=1")
        if model[:5] == "DFP07":
            dModelSyntax[model].append(["p0=1", "p0=0.1"][model == "DFP07"])
    dLogLlh = {}  # dictionary(model:logllh)

    for model in lModels:
        prevmodel, dnewpar = getNewParfromOptim(model, lModels, dModelLog,
                                                logger)
        if prevmodel != "":
            fnew = open(dModelLog[model], "w")
            for k, v in dnewpar.items():
                fnew.write(k + "=" + v.strip() + "\n")
            fnew.close()

            lignore = setIgnoreParams(model, prevmodel, lModels, logger)
            ignore = ",".join(lignore)
        else:
            ignore = ""

        if model != "M0" and "M0" in lModels:
            treeFile = dModelTrees["M0"] + "_1"

# create dictionary with all elements of the two argument lists to build commands
        modelDesc = dModelSyntax[model][0] + "(" + ",".join(
            dModelSyntax[model][1:]) + ")"
        dBppCmd = {
            "INPUTFILE": alnFile,
            "FORMAT": alnFormat,
            "TREEFILE": treeFile,
            "MODEL": modelDesc,
            "IGNORE": ignore,
            "OUTTREE": dModelTrees[model],
            "OUTPARAMS": dModelParams[model],
            "BACKUP": dModelLog[model],
            "param": bppFile
        }

        # running bppml
        logger.info("Running {:s} optimization".format(model))

        # join each couple of the cmd dictionary so that it reads "k1 = v1" "k2 = v2" etc...
        argsMx = "\"" + "\" \"".join([k + "=" + v
                                      for k, v in dBppCmd.items()]) + "\""
        logger.debug("bppml " + argsMx)
        runMx = subprocess.Popen("bppml " + argsMx,
                                 shell=True,
                                 stdout=subprocess.PIPE).wait()
        logger.debug(subprocess.PIPE)

        # fill dictionary with loglikelihoods of each model
        if os.path.exists(dModelParams[model]):
            with open(dModelParams[model], "r") as params:
                dLogLlh[model] = float(
                    params.readline().strip().split("= ")[-1])
                params.close()
                logger.info("Log Likelihood = {}".format(dLogLlh[model]))
        else:
            logger.info(
                "Possible failed optimization, likelihood has not been calculated."
            )

    # perform LRT
    # M1 vs M2
    if "M1" and "M2" in lModels:
        if "M1" and "M2" in dLogLlh:
            LR12, p12 = PSPFunc.LRT(dLogLlh["M1"], dLogLlh["M2"], 2)
            logger.info("LRT of M1 vs M2: {}".format(p12))
        else:
            logger.info(
                "Possible failed optimization, likelihoods of M1 and M2 have not been computed."
            )
    if "M7" and "M8" in lModels:
        if "M7" and "M8" in dLogLlh:
            LR78, p78 = PSPFunc.LRT(dLogLlh["M7"], dLogLlh["M8"], 2)
            logger.info("LRT of M7 vs M8: {}".format(p78))
        else:
            logger.info(
                "Possible failed optimization, likelihoods of M7 and M8 have not been computed."
            )
    if "M8" and "M8a" in lModels:
        if "M8" and "M8a" in dLogLlh:
            LR88a, p88a = PSPFunc.LRT(dLogLlh["M8a"], dLogLlh["M8"], 1)
            ts88a = 0.5 * p88a + 0.5
            logger.info("LRT of M8 vs M8a: {} (Treshold: {})".format(
                p88a, ts88a))
        else:
            logger.info(
                "Possible failed optimization, likelihoods have not been computed."
            )
    if "DFP07" and "DFP07_0" in lModels:
        if "DFP07" and "DFP07_0" in dLogLlh:
            LRDFP, pDFP = PSPFunc.LRT(dLogLlh["DFP07_0"], dLogLlh["DFP07"], 1)
            tsDFP = 0.5 * pDFP + 0.5
            logger.info("LRT of DFP07 vs DFP07_07: {} (Treshold: {})".format(
                pDFP, tsDFP))
        else:
            logger.info(
                "Possible failed optimization, likelihoods have not been computed."
            )

# Bppmixedlikelihoods
    """ 
        Optimize tree and model using bppml
        Variables to include are
              INPUTFILE - alignement file
              FORMAT - format of the aln file (here, phyx)
              TREEFILE - tree file for the analyzed aln
              PARAMS - .params file from model optimization (bppml)
              OUTINFO - name of the results file (info about sites etc.)
	"""

    for model in lModels:
        # use tree optimized in M0 for each model
        if "M0" in lModels:
            treeFile = dModelTrees["M0"] + "_1"
        else:
            treeFile = dModelTrees[model] + "_1"

        if model in ["M0", "DFP07_0"]:
            continue

        # dictionary(model:results file name)
        dModelResults = {
            model: outSite + baseName + "_results_" + model + ".log"
            for model in lModels
        }

        dMixCmd = {
            "INPUTFILE": alnFile,
            "FORMAT": alnFormat,
            "TREEFILE": treeFile,
            "params": dModelParams[model],
            "OUTINFO": dModelResults[model],
            "param": bppMixed
        }

        logger.info("Running mixed likelihoods with model {:s}".format(model))
        argsMx = "\"" + "\" \"".join([k + "=" + v
                                      for k, v in dMixCmd.items()]) + "\""
        logger.debug("bppmixedlikelihoods " + argsMx)
        runMx = subprocess.Popen("bppmixedlikelihoods " + argsMx,
                                 shell=True,
                                 stdout=subprocess.PIPE).wait()
        logger.debug(subprocess.PIPE)