def app_wrapper(func, args): global NCURSES base_dir = GLOBALS.get("scratch_dir", GLOBALS["basedir"]) lock_file = pjoin(base_dir, "alive") if not args.enable_ui: NCURSES = False if not pexist(lock_file) or args.clearall: open(lock_file, "w").write(time.ctime()) else: clear_env() print >> sys.stderr, '\nThe same process seems to be running. Use --clearall or remove the lock file "alive" within the output dir' sys.exit(-1) try: if NCURSES: curses.wrapper(main, func, args) else: main(None, func, args) except ConfigError, e: if GLOBALS.get('_background_scheduler', None): GLOBALS['_background_scheduler'].terminate() print >> sys.stderr, "\nConfiguration Error:", e clear_env() sys.exit(-1)
def app_wrapper(func, args): global NCURSES base_dir = GLOBALS.get("scratch_dir", GLOBALS["basedir"]) lock_file = pjoin(base_dir, "alive") if not args.enable_ui: NCURSES = False if not pexist(lock_file) or args.clearall: open(lock_file, "w").write(time.ctime()) else: clear_env() print >>sys.stderr, '\nThe same process seems to be running. Use --clearall or remove the lock file "alive" within the output dir' sys.exit(-1) try: if NCURSES: curses.wrapper(main, func, args) else: main(None, func, args) except ConfigError, e: if GLOBALS.get('_background_scheduler', None): GLOBALS['_background_scheduler'].terminate() print >>sys.stderr, "\nConfiguration Error:", e clear_env() sys.exit(-1)
def thread_name(task): tid = getattr(task, "threadid", None) if hasattr(task, 'target_wkname'): name = getattr(task, 'target_wkname') else: name = GLOBALS.get(tid, {}).get("_name", "?") if GLOBALS.get('verbosity', 4) < 2: if len(name)>23: name = "%s...%s" %(name[:10], name[-10:]) return "@@13:%s@@1:" %name
def finish(self): def euc_dist(x, y): return len(x.symmetric_difference(y)) / float((len(x) + len(y))) dataid = db.get_dataid(*self.task_tree_file.split(".")) ttree = PhyloTree(db.get_data(dataid)) mtree = self.main_tree ttree.dist = 0 cladeid, target_seqs, out_seqs = db.get_node_info( self.threadid, self.nodeid) self.out_seqs = out_seqs self.target_seqs = target_seqs ttree_content = ttree.get_cached_content() if mtree and not out_seqs: mtree_content = mtree.get_cached_content() log.log(24, "Finding best scoring outgroup from previous iteration.") for _n in mtree_content: if _n.cladeid == cladeid: orig_target = _n target_left = set( [_n.name for _n in mtree_content[orig_target.children[0]]]) target_right = set( [_n.name for _n in mtree_content[orig_target.children[1]]]) partition_pairs = [] everything = set([_n.name for _n in ttree_content[ttree]]) for n, content in ttree_content.iteritems(): if n is ttree: continue left = set([_n.name for _n in content]) right = everything - left d1 = euc_dist(left, target_left) d2 = euc_dist(left, target_right) best_match = min(d1, d2) partition_pairs.append([best_match, left, right, n]) partition_pairs.sort() self.outgroup_match_dist = partition_pairs[0][0] #self.outgroup_match = '#'.join( ['|'.join(partition_pairs[0][1]), # '|'.join(partition_pairs[0][2])] ) outgroup = partition_pairs[0][3] ttree.set_outgroup(outgroup) ttree.dist = orig_target.dist ttree.support = orig_target.support # Merge task and main trees parent = orig_target.up orig_target.detach() parent.add_child(ttree) elif mtree and out_seqs: log.log(26, "Rooting tree using %d custom seqs" % len(out_seqs)) self.outgroup_match = '|'.join(out_seqs) #log.log(22, "Out seqs: %s", len(out_seqs)) #log.log(22, "Target seqs: %s", target_seqs) if len(out_seqs) > 1: #first root to a single seqs outside the outgroup #(should never fail and avoids random outgroup split #problems in unrooted trees) ttree.set_outgroup(ttree & list(target_seqs)[0]) # Now tries to get the outgroup node as a monophyletic clade outgroup = ttree.get_common_ancestor(out_seqs) if set(outgroup.get_leaf_names()) ^ out_seqs: msg = "Monophyly of the selected outgroup could not be granted! Probably constrain tree failed." #dump_tree_debug(msg, self.taskdir, mtree, ttree, target_seqs, out_seqs) raise TaskError(self, msg) else: outgroup = ttree & list(out_seqs)[0] ttree.set_outgroup(outgroup) orig_target = self.main_tree.get_common_ancestor(target_seqs) found_target = outgroup.get_sisters()[0] ttree = ttree.get_common_ancestor(target_seqs) outgroup.detach() self.pre_iter_support = orig_target.support # Use previous dist and support ttree.dist = orig_target.dist ttree.support = orig_target.support parent = orig_target.up orig_target.detach() parent.add_child(ttree) else: # ROOTS FIRST ITERATION log.log(24, "Getting outgroup for first NPR split") # if early split is provided in the command line, it # overrides config file mainout = GLOBALS.get("first_split_outgroup", "midpoint") if mainout.lower() == "midpoint": log.log(26, "Rooting to midpoint.") best_outgroup = ttree.get_midpoint_outgroup() if best_outgroup: ttree.set_outgroup(best_outgroup) else: log.warning("Midpoint outgroup could not be set!") ttree.set_outgroup(ttree.iter_leaves().next()) else: if mainout.startswith("~"): # Lazy defined outgroup. Will trust in the common # ancestor of two or more OTUs strict_common_ancestor = False outs = set(mainout[1:].split()) if len(outs) < 2: raise TaskError( self, "First split outgroup error: common " "ancestor calculation requires at least two OTU names" ) else: strict_common_ancestor = True outs = set(mainout.split()) if outs - target_seqs: raise TaskError( self, "Unknown seqs cannot be used to set first split rooting:%s" % (outs - target_seqs)) if len(outs) > 1: anchor = list(set(target_seqs) - outs)[0] ttree.set_outgroup(ttree & anchor) common = ttree.get_common_ancestor(outs) out_seqs = common.get_leaf_names() if common is ttree: msg = "First split outgroup could not be granted:%s" % out_seqs #dump_tree_debug(msg, self.taskdir, mtree, ttree, target_seqs, outs) raise TaskError(self, msg) if strict_common_ancestor and set(out_seqs) ^ outs: msg = "Monophyly of first split outgroup could not be granted:%s" % out_seqs #dump_tree_debug(msg, self.taskdir, mtree, ttree, target_seqs, outs) raise TaskError(self, msg) log.log( 26, "@@8:First split rooting to %d seqs@@1:: %s" % (len(out_seqs), out_seqs)) ttree.set_outgroup(common) else: single_out = outs.pop() common = ttree.set_outgroup(single_out) log.log( 26, "@@8:First split rooting to 1 seq@@1:: %s" % (single_out)) self.main_tree = ttree orig_target = ttree tn = orig_target.copy() self.pre_iter_task_tree = tn self.rf = orig_target.robinson_foulds(ttree) self.pre_iter_support = orig_target.support # Reloads node2content of the rooted tree and generate cladeids ttree_content = self.main_tree.get_cached_content() for n, content in ttree_content.iteritems(): cid = generate_id([_n.name for _n in content]) n.add_feature("cladeid", cid) #ttree.write(outfile=self.pruned_tree) self.task_tree = ttree
sys.exit(-1) try: if NCURSES: curses.wrapper(main, func, args) else: main(None, func, args) except ConfigError, e: if GLOBALS.get('_background_scheduler', None): GLOBALS['_background_scheduler'].terminate() print >> sys.stderr, "\nConfiguration Error:", e clear_env() sys.exit(-1) except DataError, e: if GLOBALS.get('_background_scheduler', None): GLOBALS['_background_scheduler'].terminate() print >> sys.stderr, "\nData Error:", e clear_env() sys.exit(-1) except KeyboardInterrupt: # Control-C is also grabbed by the back_launcher, so it is no necessary # to terminate from here print >> sys.stderr, "\nProgram was interrupted." if args.monitor: print >> sys.stderr, ( "VERY IMPORTANT !!!: Note that launched" " jobs will keep running as you provided the --monitor flag") clear_env() sys.exit(-1)
def finish(self): def euc_dist(x, y): return len(x.symmetric_difference(y)) / float((len(x) + len(y))) dataid = db.get_dataid(*self.task_tree_file.split(".")) ttree = PhyloTree(db.get_data(dataid)) mtree = self.main_tree ttree.dist = 0 cladeid, target_seqs, out_seqs = db.get_node_info(self.threadid, self.nodeid) self.out_seqs = out_seqs self.target_seqs = target_seqs ttree_content = ttree.get_cached_content() if mtree and not out_seqs: mtree_content = mtree.get_cached_content() log.log(24, "Finding best scoring outgroup from previous iteration.") for _n in mtree_content: if _n.cladeid == cladeid: orig_target = _n target_left = set([_n.name for _n in mtree_content[orig_target.children[0]]]) target_right = set([_n.name for _n in mtree_content[orig_target.children[1]]]) partition_pairs = [] everything = set([_n.name for _n in ttree_content[ttree]]) for n, content in ttree_content.iteritems(): if n is ttree: continue left = set([_n.name for _n in content]) right = everything - left d1 = euc_dist(left, target_left) d2 = euc_dist(left, target_right) best_match = min(d1, d2) partition_pairs.append([best_match, left, right, n]) partition_pairs.sort() self.outgroup_match_dist = partition_pairs[0][0] #self.outgroup_match = '#'.join( ['|'.join(partition_pairs[0][1]), # '|'.join(partition_pairs[0][2])] ) outgroup = partition_pairs[0][3] ttree.set_outgroup(outgroup) ttree.dist = orig_target.dist ttree.support = orig_target.support # Merge task and main trees parent = orig_target.up orig_target.detach() parent.add_child(ttree) elif mtree and out_seqs: log.log(26, "Rooting tree using %d custom seqs" % len(out_seqs)) self.outgroup_match = '|'.join(out_seqs) #log.log(22, "Out seqs: %s", len(out_seqs)) #log.log(22, "Target seqs: %s", target_seqs) if len(out_seqs) > 1: #first root to a single seqs outside the outgroup #(should never fail and avoids random outgroup split #problems in unrooted trees) ttree.set_outgroup(ttree & list(target_seqs)[0]) # Now tries to get the outgroup node as a monophyletic clade outgroup = ttree.get_common_ancestor(out_seqs) if set(outgroup.get_leaf_names()) ^ out_seqs: msg = "Monophyly of the selected outgroup could not be granted! Probably constrain tree failed." #dump_tree_debug(msg, self.taskdir, mtree, ttree, target_seqs, out_seqs) raise TaskError(self, msg) else: outgroup = ttree & list(out_seqs)[0] ttree.set_outgroup(outgroup) orig_target = self.main_tree.get_common_ancestor(target_seqs) found_target = outgroup.get_sisters()[0] ttree = ttree.get_common_ancestor(target_seqs) outgroup.detach() self.pre_iter_support = orig_target.support # Use previous dist and support ttree.dist = orig_target.dist ttree.support = orig_target.support parent = orig_target.up orig_target.detach() parent.add_child(ttree) else: # ROOTS FIRST ITERATION log.log(24, "Getting outgroup for first NPR split") # if early split is provided in the command line, it # overrides config file mainout = GLOBALS.get("first_split_outgroup", "midpoint") if mainout.lower() == "midpoint": log.log(26, "Rooting to midpoint.") best_outgroup = ttree.get_midpoint_outgroup() if best_outgroup: ttree.set_outgroup(best_outgroup) else: log.warning("Midpoint outgroup could not be set!") ttree.set_outgroup(ttree.iter_leaves().next()) else: if mainout.startswith("~"): # Lazy defined outgroup. Will trust in the common # ancestor of two or more OTUs strict_common_ancestor = False outs = set(mainout[1:].split()) if len(outs) < 2: raise TaskError(self, "First split outgroup error: common " "ancestor calculation requires at least two OTU names") else: strict_common_ancestor = True outs = set(mainout.split()) if outs - target_seqs: raise TaskError(self, "Unknown seqs cannot be used to set first split rooting:%s" %(outs - target_seqs)) if len(outs) > 1: anchor = list(set(target_seqs) - outs)[0] ttree.set_outgroup(ttree & anchor) common = ttree.get_common_ancestor(outs) out_seqs = common.get_leaf_names() if common is ttree: msg = "First split outgroup could not be granted:%s" %out_seqs #dump_tree_debug(msg, self.taskdir, mtree, ttree, target_seqs, outs) raise TaskError(self, msg) if strict_common_ancestor and set(out_seqs) ^ outs: msg = "Monophyly of first split outgroup could not be granted:%s" %out_seqs #dump_tree_debug(msg, self.taskdir, mtree, ttree, target_seqs, outs) raise TaskError(self, msg) log.log(26, "@@8:First split rooting to %d seqs@@1:: %s" %(len(out_seqs),out_seqs)) ttree.set_outgroup(common) else: single_out = outs.pop() common = ttree.set_outgroup(single_out) log.log(26, "@@8:First split rooting to 1 seq@@1:: %s" %(single_out)) self.main_tree = ttree orig_target = ttree tn = orig_target.copy() self.pre_iter_task_tree = tn self.rf = orig_target.robinson_foulds(ttree) self.pre_iter_support = orig_target.support # Reloads node2content of the rooted tree and generate cladeids ttree_content = self.main_tree.get_cached_content() for n, content in ttree_content.iteritems(): cid = generate_id([_n.name for _n in content]) n.add_feature("cladeid", cid) #ttree.write(outfile=self.pruned_tree) self.task_tree = ttree
sys.exit(-1) try: if NCURSES: curses.wrapper(main, func, args) else: main(None, func, args) except ConfigError, e: if GLOBALS.get('_background_scheduler', None): GLOBALS['_background_scheduler'].terminate() print >>sys.stderr, "\nConfiguration Error:", e clear_env() sys.exit(-1) except DataError, e: if GLOBALS.get('_background_scheduler', None): GLOBALS['_background_scheduler'].terminate() print >>sys.stderr, "\nData Error:", e clear_env() sys.exit(-1) except KeyboardInterrupt: # Control-C is also grabbed by the back_launcher, so it is no necessary # to terminate from here print >>sys.stderr, "\nProgram was interrupted." if args.monitor: print >>sys.stderr, ("VERY IMPORTANT !!!: Note that launched" " jobs will keep running as you provided the --monitor flag") clear_env() sys.exit(-1) except: