def finish(self): lks = [] if self.lk_mode == "phyml": for job in self.jobs: if job.jobcat != "bionj": continue phyml_job = job tree_file = pjoin(phyml_job.jobdir, self.alg_phylip_file+"_phyml_tree.txt") stats_file = pjoin(phyml_job.jobdir, self.alg_phylip_file+"_phyml_stats.txt") tree = PhyloTree(tree_file) m = re.search('Log-likelihood:\s+(-?\d+\.\d+)', open(stats_file).read()) lk = float(m.groups()[0]) tree.add_feature("lk", lk) tree.add_feature("model", phyml_job.args["--model"]) lks.append([float(tree.lk), tree.model, tree]) elif self.lk_mode == "raxml": for job in self.jobs: if job.jobcat != "raxml": continue raxml_job = job lk = open(pjoin(raxml_job.jobdir, "RAxML_log.%s" %raxml_job.args["-n"])).readline().split()[1] tree = PhyloTree(raxml_job.args["-t"]) tree.add_feature("lk", lk) tree.add_feature("model", raxml_job.model) lks.append([float(tree.lk), tree.model, tree]) # sort lks in ASC order lks.sort() # choose the model with higher likelihood, the lastone in the list best_model = lks[-1][1] best_tree = lks[-1][2] log.log(22, "%s model selected from the following lk values:\n%s" %(best_model, '\n'.join(map(str, lks)))) ModelTesterTask.store_data(self, best_model, lks)
def __init__(self, nodeid, alg_fasta_file, alg_phylip_file, constrain_tree, conf, confname): GLOBALS["citator"].add(PHYML_CITE) self.alg_phylip_file = alg_phylip_file self.alg_fasta_file = alg_fasta_file self.confname = confname self.conf = conf self.lk_mode = conf[confname]["_lk_mode"] if self.lk_mode == "raxml": phyml_optimization = "n" elif self.lk_mode == "phyml": phyml_optimization = "lr" else: raise ValueError("Choose a valid lk_mode value (raxml or phyml)") base_args = { "--datatype": "aa", "--input": self.alg_phylip_file, "--bootstrap": "0", "-o": phyml_optimization, "--model": None, # I will iterate over this value when # creating jobs "--quiet": "" } self.models = conf[confname]["_models"] task_name = "Prottest-[%s]" %','.join(self.models) ModelTesterTask.__init__(self, nodeid, "mchooser", task_name, base_args, conf[confname]) self.best_model = None self.seqtype = "aa" self.init()
def finish(self): # first job is the raxml tree best_model = None best_model_in_next_line = False t = None for line in open(self.jobs[-1].stdout_file, "rU"): line = line.strip() if best_model_in_next_line and line.startswith("Model"): pass#best_model = line.split("=")[1].strip() elif best_model_in_next_line and line.startswith("partition"): best_model = line.split("=")[1].strip() best_model_in_next_line = False elif line.startswith("Model selected:"): best_model_in_next_line = True elif line.startswith("ML tree (NNI) for the best AIC model ="): nw = line.replace("ML tree (NNI) for the best AIC model =", "") t = PhyloTree(nw) open(self.best_model_file, "w").write(best_model) log.log(26, "Best model: %s" %best_model) if self.ttype == "tree": tree_job = self.jobs[-1] tree_file = os.path.join(tree_job.jobdir, "jModelTest_tree."+self.nodeid) t.write(outfile=self.tree_file) self.model = best_model ModelTesterTask.finish(self)
def __init__(self, nodeid, alg_fasta_file, alg_phylip_file, conf): GLOBALS["citator"].add(JMODELTEST_CITE) self.conf = conf base_args = { '-d': alg_fasta_file, } args = self.conf["jmodeltest"] if args.get("-t", "ML") == "ML": task_type = "tree" else: task_type = "mchooser" ModelTesterTask.__init__(self, nodeid, task_type, "Jmodeltest", base_args, self.conf[confname]) # set app arguments and options self.alg_fasta_file = alg_fasta_file self.alg_phylip_file = alg_phylip_file self.seqtype = "nt" self.models = "see jmodeltest params" self.init() self.best_model_file = os.path.join(self.taskdir, "best_model.txt") if task_type == "tree": self.tree_file = os.path.join(self.taskdir, "final_tree.nw") else: self.tree_file = None
def finish(self): lks = [] if self.lk_mode == "phyml": for job in [j for j in self.jobs if j.flag == "phyml"]: tree_file = os.path.join(job.jobdir, self.alg_basename+"_phyml_tree.txt") stats_file = os.path.join(j.jobdir, self.alg_basename+"_phyml_stats.txt") tree = PhyloTree(tree_file) m = re.search('Log-likelihood:\s+(-?\d+\.\d+)', open(stats_file).read()) lk = float(m.groups()[0]) tree.add_feature("lk", lk) tree.add_feature("model", job.args["--model"]) lks.append([float(tree.lk), tree.model, tree]) elif self.lk_mode == "raxml": for job in [j for j in self.jobs if j.flag == "raxml"]: lk = open(os.path.join(job.jobdir, "RAxML_log.%s" %job.args["-n"])).readline().split()[1] tree = PhyloTree(job.args["-t"]) tree.add_feature("lk", lk) tree.add_feature("model", job.model) lks.append([lk, tree.model, tree]) lks.sort() lks.reverse() # choose the model with higher likelihood best_model = lks[-1][1] best_tree = lks[-1][2] open(self.best_model_file, "w").write(best_model) if self.tree_file: tree.write(self.tree_file) ModelTesterTask.finish(self)
def __init__(self, nodeid, alg_fasta_file, alg_phylip_file, constrain_tree, conf): self.alg_phylip_file = alg_phylip_file self.alg_fasta_file = alg_fasta_file self.alg_basename = basename(self.alg_phylip_file) self.conf = conf self.lk_mode = self.conf["prottest"]["_lk_mode"] if self.lk_mode == "raxml": phyml_optimization = "n" elif self.lk_mode == "phyml": phyml_optimization = "lr" else: raise ValueError("Choose a valid lk_mode value (raxml or phyml)") base_args = { "--datatype": "aa", "--input": self.alg_basename, "--bootstrap": "0", "-o": phyml_optimization, "--model": None, # I will iterate over this value when # creating jobs "--quiet": "" } ModelTesterTask.__init__(self, nodeid, "mchooser", "Prottest", base_args, conf["prottest"]) self.best_model = None self.seqtype = "aa" self.models = self.conf["prottest"]["_models"] self.init() self.post_init()