Esempio n. 1
0
    def __init__(self, nodeid, alg_fasta_file, alg_phylip_file,
                 constrain_tree, conf, confname):
        GLOBALS["citator"].add(PHYML_CITE)

        self.alg_phylip_file = alg_phylip_file
        self.alg_fasta_file = alg_fasta_file
        self.confname = confname
        self.conf = conf
        self.lk_mode = conf[confname]["_lk_mode"]
        if self.lk_mode == "raxml":
            phyml_optimization = "n"
        elif self.lk_mode == "phyml":
            phyml_optimization = "lr"
        else:
            raise ValueError("Choose a valid lk_mode value (raxml or phyml)")

        base_args = {
            "--datatype": "aa",
            "--input": self.alg_phylip_file,
            "--bootstrap": "0",
            "-o": phyml_optimization,
            "--model": None, # I will iterate over this value when
                             # creating jobs
            "--quiet": ""
            }
        self.models = conf[confname]["_models"]
        task_name = "Prottest-[%s]" %','.join(self.models)
        ModelTesterTask.__init__(self, nodeid, "mchooser", task_name,
                      base_args, conf[confname])

        self.best_model = None
        self.seqtype = "aa"
        self.init()
Esempio n. 2
0
    def finish(self):
        # first job is the raxml tree
        best_model = None
        best_model_in_next_line = False
        t = None
        for line in open(self.jobs[-1].stdout_file, "rU"):
            line = line.strip()
            if best_model_in_next_line and line.startswith("Model"):
                pass  #best_model = line.split("=")[1].strip()
            elif best_model_in_next_line and line.startswith("partition"):
                best_model = line.split("=")[1].strip()
                best_model_in_next_line = False
            elif line.startswith("Model selected:"):
                best_model_in_next_line = True
            elif line.startswith("ML tree (NNI) for the best AIC model ="):
                nw = line.replace("ML tree (NNI) for the best AIC model =", "")
                t = PhyloTree(nw)

        open(self.best_model_file, "w").write(best_model)
        log.log(26, "Best model: %s" % best_model)
        if self.ttype == "tree":
            tree_job = self.jobs[-1]
            tree_file = os.path.join(tree_job.jobdir,
                                     "jModelTest_tree." + self.nodeid)
            t.write(outfile=self.tree_file)
            self.model = best_model

        ModelTesterTask.finish(self)
Esempio n. 3
0
    def finish(self):
        lks = []
        if self.lk_mode == "phyml":
            for job in self.jobs:
                if job.jobcat != "bionj": continue
                phyml_job = job
                tree_file = pjoin(phyml_job.jobdir,
                                  self.alg_phylip_file+"_phyml_tree.txt")
                stats_file = pjoin(phyml_job.jobdir,
                                   self.alg_phylip_file+"_phyml_stats.txt")
                tree = PhyloTree(tree_file)
                m = re.search('Log-likelihood:\s+(-?\d+\.\d+)',
                              open(stats_file).read())
                lk = float(m.groups()[0])
                tree.add_feature("lk", lk)
                tree.add_feature("model", phyml_job.args["--model"])
                lks.append([float(tree.lk), tree.model, tree])
        elif self.lk_mode == "raxml":
            for job in self.jobs:
                if job.jobcat != "raxml": continue
                raxml_job = job
                lk = open(pjoin(raxml_job.jobdir, "RAxML_log.%s"
                                %raxml_job.args["-n"])).readline().split()[1]
                tree = PhyloTree(raxml_job.args["-t"])
                tree.add_feature("lk", lk)
                tree.add_feature("model", raxml_job.model)
                lks.append([float(tree.lk), tree.model, tree])

        # sort lks in ASC order
        lks.sort()
        # choose the model with higher likelihood, the lastone in the list
        best_model = lks[-1][1]
        best_tree = lks[-1][2]
        log.log(22, "%s model selected from the following lk values:\n%s" %(best_model, '\n'.join(map(str, lks))))
        ModelTesterTask.store_data(self, best_model, lks)
Esempio n. 4
0
    def __init__(self, nodeid, alg_fasta_file, alg_phylip_file,
                 constrain_tree, conf):
        self.alg_phylip_file = alg_phylip_file
        self.alg_fasta_file = alg_fasta_file
        self.alg_basename = basename(self.alg_phylip_file)
        self.conf = conf
        self.lk_mode = self.conf["prottest"]["_lk_mode"]
        if self.lk_mode == "raxml":
            phyml_optimization = "n"
        elif self.lk_mode == "phyml":
            phyml_optimization = "lr"
        else:
            raise ValueError("Choose a valid lk_mode value (raxml or phyml)")

        base_args = {
            "--datatype": "aa",
            "--input": self.alg_basename,
            "--bootstrap": "0",
            "-o": phyml_optimization,
            "--model": None, # I will iterate over this value when
                             # creating jobs
            "--quiet": ""
            }

        ModelTesterTask.__init__(self, nodeid, "mchooser", "Prottest",
                      base_args, conf["prottest"])

        self.best_model = None
        self.seqtype = "aa"
        self.models = self.conf["prottest"]["_models"]
        self.init()
        self.post_init()
Esempio n. 5
0
    def __init__(self, nodeid, alg_fasta_file, alg_phylip_file, conf):
        GLOBALS["citator"].add(JMODELTEST_CITE)

        self.conf = conf
        base_args = {
            '-d': alg_fasta_file,
        }
        args = self.conf["jmodeltest"]
        if args.get("-t", "ML") == "ML":
            task_type = "tree"
        else:
            task_type = "mchooser"

        ModelTesterTask.__init__(self, nodeid, task_type, "Jmodeltest",
                                 base_args, self.conf[confname])

        # set app arguments and options
        self.alg_fasta_file = alg_fasta_file
        self.alg_phylip_file = alg_phylip_file
        self.seqtype = "nt"
        self.models = "see jmodeltest params"

        self.init()
        self.best_model_file = os.path.join(self.taskdir, "best_model.txt")
        if task_type == "tree":
            self.tree_file = os.path.join(self.taskdir, "final_tree.nw")
        else:
            self.tree_file = None
Esempio n. 6
0
    def __init__(self, nodeid, alg_fasta_file, alg_phylip_file, constrain_tree,
                 conf, confname):
        GLOBALS["citator"].add(PHYML_CITE)

        self.alg_phylip_file = alg_phylip_file
        self.alg_fasta_file = alg_fasta_file
        self.confname = confname
        self.conf = conf
        self.lk_mode = conf[confname]["_lk_mode"]
        if self.lk_mode == "raxml":
            phyml_optimization = "n"
        elif self.lk_mode == "phyml":
            phyml_optimization = "lr"
        else:
            raise ValueError("Choose a valid lk_mode value (raxml or phyml)")

        base_args = {
            "--datatype": "aa",
            "--input": self.alg_phylip_file,
            "--bootstrap": "0",
            "-o": phyml_optimization,
            "--model": None,  # I will iterate over this value when
            # creating jobs
            "--quiet": ""
        }
        self.models = conf[confname]["_models"]
        task_name = "Prottest-[%s]" % ','.join(self.models)
        ModelTesterTask.__init__(self, nodeid, "mchooser", task_name,
                                 base_args, conf[confname])

        self.best_model = None
        self.seqtype = "aa"
        self.init()
Esempio n. 7
0
 def finish(self):
     lks = []
     if self.lk_mode == "phyml":
         for job in [j for j in self.jobs if j.flag == "phyml"]:
             tree_file = os.path.join(job.jobdir,
                                      self.alg_basename+"_phyml_tree.txt")
             stats_file = os.path.join(j.jobdir,
                                       self.alg_basename+"_phyml_stats.txt")
             tree = PhyloTree(tree_file)
             m = re.search('Log-likelihood:\s+(-?\d+\.\d+)',
                           open(stats_file).read())
             lk = float(m.groups()[0])
             tree.add_feature("lk", lk)
             tree.add_feature("model", job.args["--model"])
             lks.append([float(tree.lk), tree.model, tree])
     elif self.lk_mode == "raxml":
         for job in [j for j in self.jobs if j.flag == "raxml"]:
             lk = open(os.path.join(job.jobdir, "RAxML_log.%s"
                                    %job.args["-n"])).readline().split()[1]
             tree = PhyloTree(job.args["-t"])
             tree.add_feature("lk", lk)
             tree.add_feature("model", job.model)
             lks.append([lk, tree.model, tree])
     lks.sort()
     lks.reverse()
     # choose the model with higher likelihood
     best_model = lks[-1][1]
     best_tree = lks[-1][2]
     open(self.best_model_file, "w").write(best_model)
     if self.tree_file:
         tree.write(self.tree_file)
     ModelTesterTask.finish(self)
Esempio n. 8
0
    def finish(self):
        # first job is the raxml tree
        best_model = None
        best_model_in_next_line = False
        t = None
        for line in open(self.jobs[-1].stdout_file, "rU"):
            line = line.strip()
            if best_model_in_next_line and line.startswith("Model"):
                pass#best_model = line.split("=")[1].strip()
            elif best_model_in_next_line and line.startswith("partition"):
                best_model = line.split("=")[1].strip()
                best_model_in_next_line = False
            elif line.startswith("Model selected:"):
                best_model_in_next_line = True
            elif line.startswith("ML tree (NNI) for the best AIC model ="):
                nw = line.replace("ML tree (NNI) for the best AIC model =", "")
                t = PhyloTree(nw)

        open(self.best_model_file, "w").write(best_model)
        log.log(26, "Best model: %s" %best_model)
        if self.ttype == "tree":
            tree_job = self.jobs[-1]
            tree_file =  os.path.join(tree_job.jobdir,
                                      "jModelTest_tree."+self.nodeid)
            t.write(outfile=self.tree_file)
            self.model = best_model


        ModelTesterTask.finish(self)
Esempio n. 9
0
    def __init__(self, nodeid, alg_fasta_file, alg_phylip_file, conf):
        GLOBALS["citator"].add(JMODELTEST_CITE)

        self.conf = conf
        base_args = {
            '-d': alg_fasta_file,
            }
        args = self.conf["jmodeltest"]
        if args.get("-t", "ML") == "ML":
            task_type = "tree"
        else:
            task_type = "mchooser"

        ModelTesterTask.__init__(self, nodeid, task_type, "Jmodeltest",
                                 base_args, self.conf[confname])

        # set app arguments and options
        self.alg_fasta_file = alg_fasta_file
        self.alg_phylip_file = alg_phylip_file
        self.seqtype = "nt"
        self.models = "see jmodeltest params"

        self.init()
        self.best_model_file = os.path.join(self.taskdir, "best_model.txt")
        if task_type == "tree":
            self.tree_file = os.path.join(self.taskdir, "final_tree.nw")
        else:
            self.tree_file = None
Esempio n. 10
0
    def __init__(self, nodeid, alg_fasta_file, alg_phylip_file, constrain_tree,
                 conf):
        self.alg_phylip_file = alg_phylip_file
        self.alg_fasta_file = alg_fasta_file
        self.alg_basename = basename(self.alg_phylip_file)
        self.conf = conf
        self.lk_mode = self.conf["prottest"]["_lk_mode"]
        if self.lk_mode == "raxml":
            phyml_optimization = "n"
        elif self.lk_mode == "phyml":
            phyml_optimization = "lr"
        else:
            raise ValueError("Choose a valid lk_mode value (raxml or phyml)")

        base_args = {
            "--datatype": "aa",
            "--input": self.alg_basename,
            "--bootstrap": "0",
            "-o": phyml_optimization,
            "--model": None,  # I will iterate over this value when
            # creating jobs
            "--quiet": ""
        }

        ModelTesterTask.__init__(self, nodeid, "mchooser", "Prottest",
                                 base_args, conf["prottest"])

        self.best_model = None
        self.seqtype = "aa"
        self.models = self.conf["prottest"]["_models"]
        self.init()
        self.post_init()
Esempio n. 11
0
 def finish(self):
     lks = []
     if self.lk_mode == "phyml":
         for job in [j for j in self.jobs if j.flag == "phyml"]:
             tree_file = os.path.join(job.jobdir,
                                      self.alg_basename + "_phyml_tree.txt")
             stats_file = os.path.join(
                 j.jobdir, self.alg_basename + "_phyml_stats.txt")
             tree = PhyloTree(tree_file)
             m = re.search('Log-likelihood:\s+(-?\d+\.\d+)',
                           open(stats_file).read())
             lk = float(m.groups()[0])
             tree.add_feature("lk", lk)
             tree.add_feature("model", job.args["--model"])
             lks.append([float(tree.lk), tree.model, tree])
     elif self.lk_mode == "raxml":
         for job in [j for j in self.jobs if j.flag == "raxml"]:
             lk = open(
                 os.path.join(job.jobdir, "RAxML_log.%s" %
                              job.args["-n"])).readline().split()[1]
             tree = PhyloTree(job.args["-t"])
             tree.add_feature("lk", lk)
             tree.add_feature("model", job.model)
             lks.append([lk, tree.model, tree])
     lks.sort()
     lks.reverse()
     # choose the model with higher likelihood
     best_model = lks[-1][1]
     best_tree = lks[-1][2]
     open(self.best_model_file, "w").write(best_model)
     if self.tree_file:
         tree.write(self.tree_file)
     ModelTesterTask.finish(self)
Esempio n. 12
0
    def finish(self):
        lks = []
        if self.lk_mode == "phyml":
            for job in self.jobs:
                if job.jobcat != "bionj": continue
                phyml_job = job
                tree_file = pjoin(phyml_job.jobdir,
                                  self.alg_phylip_file + "_phyml_tree.txt")
                stats_file = pjoin(phyml_job.jobdir,
                                   self.alg_phylip_file + "_phyml_stats.txt")
                tree = PhyloTree(tree_file)
                m = re.search('Log-likelihood:\s+(-?\d+\.\d+)',
                              open(stats_file).read())
                lk = float(m.groups()[0])
                tree.add_feature("lk", lk)
                tree.add_feature("model", phyml_job.args["--model"])
                lks.append([float(tree.lk), tree.model, tree])
        elif self.lk_mode == "raxml":
            for job in self.jobs:
                if job.jobcat != "raxml": continue
                raxml_job = job
                lk = open(
                    pjoin(raxml_job.jobdir, "RAxML_log.%s" %
                          raxml_job.args["-n"])).readline().split()[1]
                tree = PhyloTree(raxml_job.args["-t"])
                tree.add_feature("lk", lk)
                tree.add_feature("model", raxml_job.model)
                lks.append([float(tree.lk), tree.model, tree])

        # sort lks in ASC order
        lks.sort()
        # choose the model with higher likelihood, the lastone in the list
        best_model = lks[-1][1]
        best_tree = lks[-1][2]
        log.log(
            22, "%s model selected from the following lk values:\n%s" %
            (best_model, '\n'.join(map(str, lks))))
        ModelTesterTask.store_data(self, best_model, lks)