def app_wrapper(func, args): global NCURSES base_dir = GLOBALS.get("scratch_dir", GLOBALS["basedir"]) lock_file = pjoin(base_dir, "alive") if not args.enable_ui: NCURSES = False if not pexist(lock_file) or args.clearall: open(lock_file, "w").write(time.ctime()) else: clear_env() print('\nThe same process seems to be running. Use --clearall or remove the lock file "alive" within the output dir', file=sys.stderr) sys.exit(-1) try: if NCURSES: curses.wrapper(main, func, args) else: main(None, func, args) except ConfigError as e: if GLOBALS.get('_background_scheduler', None): GLOBALS['_background_scheduler'].terminate() print("\nConfiguration Error:", e, file=sys.stderr) clear_env() sys.exit(-1) except DataError as e: if GLOBALS.get('_background_scheduler', None): GLOBALS['_background_scheduler'].terminate() print("\nData Error:", e, file=sys.stderr) clear_env() sys.exit(-1) except KeyboardInterrupt: # Control-C is also grabbed by the back_launcher, so it is no necessary # to terminate from here print("\nProgram was interrupted.", file=sys.stderr) if args.monitor: print(("VERY IMPORTANT !!!: Note that launched" " jobs will keep running as you provided the --monitor flag"), file=sys.stderr) clear_env() sys.exit(-1) except: if GLOBALS.get('_background_scheduler', None): GLOBALS['_background_scheduler'].terminate() clear_env() raise else: if GLOBALS.get('_background_scheduler', None): GLOBALS['_background_scheduler'].terminate() clear_env()
def process_task(task, wkname, npr_conf, nodeid2info): alignerconf, alignerclass = npr_conf.aligner cleanerconf, cleanerclass = npr_conf.alg_cleaner mtesterconf, mtesterclass = npr_conf.model_tester treebuilderconf, treebuilderclass = npr_conf.tree_builder if not treebuilderclass: # Allows to dump algs in workflows with no tree tasks treebuilderclass = DummyTree splitterconf, splitterclass = npr_conf.tree_splitter conf = GLOBALS[task.configid] seqtype = task.seqtype nodeid = task.nodeid ttype = task.ttype taskid = task.taskid threadid = task.threadid node_info = nodeid2info[nodeid] size = task.size #node_info.get("size", 0) target_seqs = node_info.get("target_seqs", []) out_seqs = node_info.get("out_seqs", []) if not treebuilderclass or size < 4: # Allows to dump algs in workflows with no tree tasks or if tree # inference does not make sense given the number of sequences. DummyTree # will produce a fake fully collapsed newick tree. treebuilderclass = DummyTree # If more than one outgroup are used, enable the use of constrain if out_seqs and len(out_seqs) > 1: constrain_id = nodeid else: constrain_id = None new_tasks = [] if ttype == "msf": # Register Tree constrains constrain_tree = "(%s, (%s));" % (','.join(sorted( task.out_seqs)), ','.join(sorted(task.target_seqs))) _outs = "\n".join([">%s\n0" % name for name in sorted(task.out_seqs)]) _tars = "\n".join( [">%s\n1" % name for name in sorted(task.target_seqs)]) constrain_alg = '\n'.join([_outs, _tars]) db.add_task_data(nodeid, DATATYPES.constrain_tree, constrain_tree) db.add_task_data(nodeid, DATATYPES.constrain_alg, constrain_alg) db.dataconn.commit() # since the creation of some Task # objects may require this info, I need # to commit right now. # Register node db.add_node(task.threadid, task.nodeid, task.cladeid, task.target_seqs, task.out_seqs) nodeid2info[nodeid]["size"] = task.size nodeid2info[nodeid]["target_seqs"] = task.target_seqs nodeid2info[nodeid]["out_seqs"] = task.out_seqs alg_task = alignerclass(nodeid, task.multiseq_file, seqtype, conf, alignerconf) alg_task.size = task.size new_tasks.append(alg_task) elif ttype == "alg" or ttype == "acleaner": if ttype == "alg": nodeid2info[nodeid]["alg_path"] = task.alg_fasta_file elif ttype == "acleaner": nodeid2info[nodeid]["alg_clean_path"] = task.clean_alg_fasta_file alg_fasta_file = getattr(task, "clean_alg_fasta_file", task.alg_fasta_file) alg_phylip_file = getattr(task, "clean_alg_phylip_file", task.alg_phylip_file) # Calculate alignment stats # cons_mean, cons_std = get_trimal_conservation(task.alg_fasta_file, # conf["app"]["trimal"]) # # max_identity = get_trimal_identity(task.alg_fasta_file, # conf["app"]["trimal"]) # log.info("Conservation: %0.2f +-%0.2f", cons_mean, cons_std) # log.info("Max. Identity: %0.2f", max_identity) #import time #t1 = time.time() #mx, mn, mean, std = get_identity(task.alg_fasta_file) #print time.time()-t1 #log.log(26, "Identity: max=%0.2f min=%0.2f mean=%0.2f +- %0.2f", # mx, mn, mean, std) #t1 = time.time() if seqtype == "aa" and npr_conf.switch_aa_similarity < 1: try: alg_stats = db.get_task_data(taskid, DATATYPES.alg_stats) except Exception as e: alg_stats = {} if ttype == "alg": algfile = pjoin(GLOBALS["input_dir"], task.alg_phylip_file) dataid = DATATYPES.alg_phylip elif ttype == "acleaner": algfile = pjoin(GLOBALS["input_dir"], task.clean_alg_phylip_file) dataid = DATATYPES.clean_alg_phylip if "i_mean" not in alg_stats: log.log(24, "Calculating alignment stats...") # dump data if necesary algfile = pjoin(GLOBALS["input_dir"], task.alg_phylip_file) if not pexist(algfile): # dump phylip alg open(algfile, "w").write(db.get_data(db.get_dataid(taskid, dataid))) mx, mn, mean, std = get_statal_identity( algfile, conf["app"]["statal"]) alg_stats = { "i_max": mx, "i_mean": mean, "i_min": mn, "i_std": std } db.add_task_data(taskid, DATATYPES.alg_stats, alg_stats) log.log(22, "Alignment stats (sequence similarity):") log.log( 22, " max: %(i_max)0.2f, min:%(i_min)0.2f, avg:%(i_mean)0.2f+-%(i_std)0.2f" % (alg_stats)) else: alg_stats = {"i_max": -1, "i_mean": -1, "i_min": -1, "i_std": -1} #print time.time()-t1 #log.log(24, "Identity: max=%0.2f min=%0.2f mean=%0.2f +- %0.2f", # mx, mn, mean, std) task.max_ident = alg_stats["i_max"] task.min_ident = alg_stats["i_min"] task.mean_ident = alg_stats["i_mean"] task.std_ident = alg_stats["i_std"] next_task = None if ttype == "alg" and cleanerclass: next_task = cleanerclass(nodeid, seqtype, alg_fasta_file, alg_phylip_file, conf, cleanerconf) else: # Converts aa alignment into nt if necessary if seqtype == "aa" and \ "nt" in GLOBALS["seqtypes"] and \ task.mean_ident >= npr_conf.switch_aa_similarity: log.log(28, "@@2:Switching to codon alignment!@@1: amino-acid sequence similarity: %0.2f >= %0.2f" %\ (task.mean_ident, npr_conf.switch_aa_similarity)) alg_fasta_file = "%s.%s" % (taskid, DATATYPES.alg_nt_fasta) alg_phylip_file = "%s.%s" % (taskid, DATATYPES.alg_nt_phylip) try: alg_fasta_file = db.get_dataid(taskid, DATATYPES.alg_nt_fasta) alg_fasta_file = db.get_dataid(taskid, DATATYPES.alg_nt_phylip) except ValueError: log.log(22, "Calculating codon alignment...") source_alg = pjoin(GLOBALS["input_dir"], task.alg_fasta_file) if ttype == "alg": kept_columns = [] elif ttype == "acleaner": # if original alignment was trimmed, use it as reference # but make the nt alignment only on the kept columns kept_columns = db.get_task_data( taskid, DATATYPES.kept_alg_columns) if not pexist(source_alg): open(source_alg, "w").write( db.get_task_data(taskid, DATATYPES.alg_fasta)) nt_alg = switch_to_codon(source_alg, kept_columns=kept_columns) db.add_task_data(taskid, DATATYPES.alg_nt_fasta, nt_alg.write()) db.add_task_data(taskid, DATATYPES.alg_nt_phylip, nt_alg.write(format='iphylip_relaxed')) npr_conf = IterConfig(conf, wkname, task.size, "nt") seqtype = "nt" if mtesterclass: next_task = mtesterclass(nodeid, alg_fasta_file, alg_phylip_file, constrain_id, conf, mtesterconf) elif treebuilderclass: next_task = treebuilderclass(nodeid, alg_phylip_file, constrain_id, None, seqtype, conf, treebuilderconf) if next_task: next_task.size = task.size new_tasks.append(next_task) elif ttype == "mchooser": if treebuilderclass: alg_fasta_file = task.alg_fasta_file alg_phylip_file = task.alg_phylip_file model = task.best_model tree_task = treebuilderclass(nodeid, alg_phylip_file, constrain_id, model, seqtype, conf, treebuilderconf) tree_task.size = task.size new_tasks.append(tree_task) elif ttype == "tree": treemerge_task = splitterclass(nodeid, seqtype, task.tree_file, conf, splitterconf) #if conf["tree_splitter"]["_outgroup_size"]: # treemerge_task = TreeSplitterWithOutgroups(nodeid, seqtype, task.tree_file, main_tree, conf) #else: # treemerge_task = TreeSplitter(nodeid, seqtype, task.tree_file, main_tree, conf) treemerge_task.size = task.size new_tasks.append(treemerge_task) elif ttype == "treemerger": if not task.task_tree: task.finish() log.log(24, "Saving task tree...") annotate_node(task.task_tree, task) db.update_node(nid=task.nodeid, runid=task.threadid, newick=db.encode(task.task_tree)) db.commit() if not isinstance(treebuilderclass, DummyTree) and npr_conf.max_iters > 1: current_iter = get_iternumber(threadid) if npr_conf.max_iters and current_iter >= npr_conf.max_iters: log.warning("Maximum number of iterations reached!") else: # Add new nodes source_seqtype = "aa" if "aa" in GLOBALS["seqtypes"] else "nt" ttree, mtree = task.task_tree, task.main_tree log.log(26, "Processing tree: %s seqs, %s outgroups", len(target_seqs), len(out_seqs)) alg_path = node_info.get("clean_alg_path", node_info["alg_path"]) for node, seqs, outs, wkname in get_next_npr_node( threadid, ttree, task.out_seqs, mtree, alg_path, npr_conf): log.log(24, "Registering new node: %s seqs, %s outgroups", len(seqs), len(outs)) new_task_node = Msf(seqs, outs, seqtype=source_seqtype) new_task_node.target_wkname = wkname new_tasks.append(new_task_node) return new_tasks
def prevent_sqlite_umask_bug(fname): # avoids using sqlite module to create the file with deafult 644 umask # permissions. Bug # http://www.mail-archive.com/[email protected]/msg59080.html if not pexist(fname): open(fname, "w").close()
def process_task(task, wkname, npr_conf, nodeid2info): alignerconf, alignerclass = npr_conf.aligner cleanerconf, cleanerclass = npr_conf.alg_cleaner mtesterconf, mtesterclass = npr_conf.model_tester treebuilderconf, treebuilderclass = npr_conf.tree_builder if not treebuilderclass: # Allows to dump algs in workflows with no tree tasks treebuilderclass = DummyTree splitterconf, splitterclass = npr_conf.tree_splitter conf = GLOBALS[task.configid] seqtype = task.seqtype nodeid = task.nodeid ttype = task.ttype taskid = task.taskid threadid = task.threadid node_info = nodeid2info[nodeid] size = task.size#node_info.get("size", 0) target_seqs = node_info.get("target_seqs", []) out_seqs = node_info.get("out_seqs", []) if not treebuilderclass or size < 4: # Allows to dump algs in workflows with no tree tasks or if tree # inference does not make sense given the number of sequences. DummyTree # will produce a fake fully collapsed newick tree. treebuilderclass = DummyTree # If more than one outgroup are used, enable the use of constrain if out_seqs and len(out_seqs) > 1: constrain_id = nodeid else: constrain_id = None new_tasks = [] if ttype == "msf": # Register Tree constrains constrain_tree = "(%s, (%s));" %(','.join(sorted(task.out_seqs)), ','.join(sorted(task.target_seqs))) _outs = "\n".join([">%s\n0" %name for name in sorted(task.out_seqs)]) _tars = "\n".join([">%s\n1" %name for name in sorted(task.target_seqs)]) constrain_alg = '\n'.join([_outs, _tars]) db.add_task_data(nodeid, DATATYPES.constrain_tree, constrain_tree) db.add_task_data(nodeid, DATATYPES.constrain_alg, constrain_alg) db.dataconn.commit() # since the creation of some Task # objects may require this info, I need # to commit right now. # Register node db.add_node(task.threadid, task.nodeid, task.cladeid, task.target_seqs, task.out_seqs) nodeid2info[nodeid]["size"] = task.size nodeid2info[nodeid]["target_seqs"] = task.target_seqs nodeid2info[nodeid]["out_seqs"] = task.out_seqs alg_task = alignerclass(nodeid, task.multiseq_file, seqtype, conf, alignerconf) alg_task.size = task.size new_tasks.append(alg_task) elif ttype == "alg" or ttype == "acleaner": if ttype == "alg": nodeid2info[nodeid]["alg_path"] = task.alg_fasta_file elif ttype == "acleaner": nodeid2info[nodeid]["alg_clean_path"] = task.clean_alg_fasta_file alg_fasta_file = getattr(task, "clean_alg_fasta_file", task.alg_fasta_file) alg_phylip_file = getattr(task, "clean_alg_phylip_file", task.alg_phylip_file) # Calculate alignment stats # cons_mean, cons_std = get_trimal_conservation(task.alg_fasta_file, # conf["app"]["trimal"]) # # max_identity = get_trimal_identity(task.alg_fasta_file, # conf["app"]["trimal"]) # log.info("Conservation: %0.2f +-%0.2f", cons_mean, cons_std) # log.info("Max. Identity: %0.2f", max_identity) #import time #t1 = time.time() #mx, mn, mean, std = get_identity(task.alg_fasta_file) #print time.time()-t1 #log.log(26, "Identity: max=%0.2f min=%0.2f mean=%0.2f +- %0.2f", # mx, mn, mean, std) #t1 = time.time() if seqtype == "aa" and npr_conf.switch_aa_similarity < 1: try: alg_stats = db.get_task_data(taskid, DATATYPES.alg_stats) except Exception as e: alg_stats = {} if ttype == "alg": algfile = pjoin(GLOBALS["input_dir"], task.alg_phylip_file) dataid = DATATYPES.alg_phylip elif ttype == "acleaner": algfile = pjoin(GLOBALS["input_dir"], task.clean_alg_phylip_file) dataid = DATATYPES.clean_alg_phylip if "i_mean" not in alg_stats: log.log(24, "Calculating alignment stats...") # dump data if necesary algfile = pjoin(GLOBALS["input_dir"], task.alg_phylip_file) if not pexist(algfile): # dump phylip alg open(algfile, "w").write(db.get_data(db.get_dataid(taskid, dataid))) mx, mn, mean, std = get_statal_identity(algfile, conf["app"]["statal"]) alg_stats = {"i_max":mx, "i_mean":mean, "i_min":mn, "i_std":std} db.add_task_data(taskid, DATATYPES.alg_stats, alg_stats) log.log(22, "Alignment stats (sequence similarity):") log.log(22, " max: %(i_max)0.2f, min:%(i_min)0.2f, avg:%(i_mean)0.2f+-%(i_std)0.2f" % (alg_stats)) else: alg_stats = {"i_max":-1, "i_mean":-1, "i_min":-1, "i_std":-1} #print time.time()-t1 #log.log(24, "Identity: max=%0.2f min=%0.2f mean=%0.2f +- %0.2f", # mx, mn, mean, std) task.max_ident = alg_stats["i_max"] task.min_ident = alg_stats["i_min"] task.mean_ident = alg_stats["i_mean"] task.std_ident = alg_stats["i_std"] next_task = None if ttype == "alg" and cleanerclass: next_task = cleanerclass(nodeid, seqtype, alg_fasta_file, alg_phylip_file, conf, cleanerconf) else: # Converts aa alignment into nt if necessary if seqtype == "aa" and \ "nt" in GLOBALS["seqtypes"] and \ task.mean_ident >= npr_conf.switch_aa_similarity: log.log(28, "@@2:Switching to codon alignment!@@1: amino-acid sequence similarity: %0.2f >= %0.2f" %\ (task.mean_ident, npr_conf.switch_aa_similarity)) alg_fasta_file = "%s.%s" %(taskid, DATATYPES.alg_nt_fasta) alg_phylip_file = "%s.%s" %(taskid, DATATYPES.alg_nt_phylip) try: alg_fasta_file = db.get_dataid(taskid, DATATYPES.alg_nt_fasta) alg_fasta_file = db.get_dataid(taskid, DATATYPES.alg_nt_phylip) except ValueError: log.log(22, "Calculating codon alignment...") source_alg = pjoin(GLOBALS["input_dir"], task.alg_fasta_file) if ttype == "alg": kept_columns = [] elif ttype == "acleaner": # if original alignment was trimmed, use it as reference # but make the nt alignment only on the kept columns kept_columns = db.get_task_data(taskid, DATATYPES.kept_alg_columns) if not pexist(source_alg): open(source_alg, "w").write(db.get_task_data(taskid, DATATYPES.alg_fasta)) nt_alg = switch_to_codon(source_alg, kept_columns=kept_columns) db.add_task_data(taskid, DATATYPES.alg_nt_fasta, nt_alg.write()) db.add_task_data(taskid, DATATYPES.alg_nt_phylip, nt_alg.write(format='iphylip_relaxed')) npr_conf = IterConfig(conf, wkname, task.size, "nt") seqtype = "nt" if mtesterclass: next_task = mtesterclass(nodeid, alg_fasta_file, alg_phylip_file, constrain_id, conf, mtesterconf) elif treebuilderclass: next_task = treebuilderclass(nodeid, alg_phylip_file, constrain_id, None, seqtype, conf, treebuilderconf) if next_task: next_task.size = task.size new_tasks.append(next_task) elif ttype == "mchooser": if treebuilderclass: alg_fasta_file = task.alg_fasta_file alg_phylip_file = task.alg_phylip_file model = task.best_model tree_task = treebuilderclass(nodeid, alg_phylip_file, constrain_id, model, seqtype, conf, treebuilderconf) tree_task.size = task.size new_tasks.append(tree_task) elif ttype == "tree": treemerge_task = splitterclass(nodeid, seqtype, task.tree_file, conf, splitterconf) #if conf["tree_splitter"]["_outgroup_size"]: # treemerge_task = TreeSplitterWithOutgroups(nodeid, seqtype, task.tree_file, main_tree, conf) #else: # treemerge_task = TreeSplitter(nodeid, seqtype, task.tree_file, main_tree, conf) treemerge_task.size = task.size new_tasks.append(treemerge_task) elif ttype == "treemerger": if not task.task_tree: task.finish() log.log(24, "Saving task tree...") annotate_node(task.task_tree, task) db.update_node(nid=task.nodeid, runid=task.threadid, newick=db.encode(task.task_tree)) db.commit() if not isinstance(treebuilderclass, DummyTree) and npr_conf.max_iters > 1: current_iter = get_iternumber(threadid) if npr_conf.max_iters and current_iter >= npr_conf.max_iters: log.warning("Maximum number of iterations reached!") else: # Add new nodes source_seqtype = "aa" if "aa" in GLOBALS["seqtypes"] else "nt" ttree, mtree = task.task_tree, task.main_tree log.log(26, "Processing tree: %s seqs, %s outgroups", len(target_seqs), len(out_seqs)) alg_path = node_info.get("clean_alg_path", node_info["alg_path"]) for node, seqs, outs, wkname in get_next_npr_node(threadid, ttree, task.out_seqs, mtree, alg_path, npr_conf): log.log(24, "Registering new node: %s seqs, %s outgroups", len(seqs), len(outs)) new_task_node = Msf(seqs, outs, seqtype=source_seqtype) new_task_node.target_wkname = wkname new_tasks.append(new_task_node) return new_tasks