Esempio n. 1
0
    def finish(self):
        def euc_dist(x, y):
            return len(x.symmetric_difference(y)) / float((len(x) + len(y)))

        dataid = db.get_dataid(*self.task_tree_file.split("."))
        ttree = PhyloTree(db.get_data(dataid))
        mtree = self.main_tree
        ttree.dist = 0
        cladeid, target_seqs, out_seqs = db.get_node_info(
            self.threadid, self.nodeid)
        self.out_seqs = out_seqs
        self.target_seqs = target_seqs

        ttree_content = ttree.get_cached_content()
        if mtree and not out_seqs:
            mtree_content = mtree.get_cached_content()
            log.log(24,
                    "Finding best scoring outgroup from previous iteration.")
            for _n in mtree_content:
                if _n.cladeid == cladeid:
                    orig_target = _n
            target_left = set(
                [_n.name for _n in mtree_content[orig_target.children[0]]])
            target_right = set(
                [_n.name for _n in mtree_content[orig_target.children[1]]])

            partition_pairs = []
            everything = set([_n.name for _n in ttree_content[ttree]])
            for n, content in ttree_content.iteritems():
                if n is ttree:
                    continue
                left = set([_n.name for _n in content])
                right = everything - left
                d1 = euc_dist(left, target_left)
                d2 = euc_dist(left, target_right)
                best_match = min(d1, d2)
                partition_pairs.append([best_match, left, right, n])

            partition_pairs.sort()

            self.outgroup_match_dist = partition_pairs[0][0]
            #self.outgroup_match = '#'.join( ['|'.join(partition_pairs[0][1]),
            #                      '|'.join(partition_pairs[0][2])] )

            outgroup = partition_pairs[0][3]
            ttree.set_outgroup(outgroup)

            ttree.dist = orig_target.dist
            ttree.support = orig_target.support

            # Merge task and main trees
            parent = orig_target.up
            orig_target.detach()
            parent.add_child(ttree)

        elif mtree and out_seqs:
            log.log(26, "Rooting tree using %d custom seqs" % len(out_seqs))

            self.outgroup_match = '|'.join(out_seqs)

            #log.log(22, "Out seqs:    %s", len(out_seqs))
            #log.log(22, "Target seqs: %s", target_seqs)
            if len(out_seqs) > 1:
                #first root to a single seqs outside the outgroup
                #(should never fail and avoids random outgroup split
                #problems in unrooted trees)
                ttree.set_outgroup(ttree & list(target_seqs)[0])
                # Now tries to get the outgroup node as a monophyletic clade
                outgroup = ttree.get_common_ancestor(out_seqs)
                if set(outgroup.get_leaf_names()) ^ out_seqs:
                    msg = "Monophyly of the selected outgroup could not be granted! Probably constrain tree failed."
                    #dump_tree_debug(msg, self.taskdir, mtree, ttree, target_seqs, out_seqs)
                    raise TaskError(self, msg)
            else:
                outgroup = ttree & list(out_seqs)[0]

            ttree.set_outgroup(outgroup)
            orig_target = self.main_tree.get_common_ancestor(target_seqs)
            found_target = outgroup.get_sisters()[0]

            ttree = ttree.get_common_ancestor(target_seqs)
            outgroup.detach()
            self.pre_iter_support = orig_target.support
            # Use previous dist and support
            ttree.dist = orig_target.dist
            ttree.support = orig_target.support
            parent = orig_target.up
            orig_target.detach()
            parent.add_child(ttree)

        else:
            # ROOTS FIRST ITERATION
            log.log(24, "Getting outgroup for first NPR split")

            # if early split is provided in the command line, it
            # overrides config file
            mainout = GLOBALS.get("first_split_outgroup", "midpoint")

            if mainout.lower() == "midpoint":
                log.log(26, "Rooting to midpoint.")
                best_outgroup = ttree.get_midpoint_outgroup()
                if best_outgroup:
                    ttree.set_outgroup(best_outgroup)
                else:
                    log.warning("Midpoint outgroup could not be set!")
                    ttree.set_outgroup(ttree.iter_leaves().next())
            else:
                if mainout.startswith("~"):
                    # Lazy defined outgroup. Will trust in the common
                    # ancestor of two or more OTUs
                    strict_common_ancestor = False
                    outs = set(mainout[1:].split())
                    if len(outs) < 2:
                        raise TaskError(
                            self, "First split outgroup error: common "
                            "ancestor calculation requires at least two OTU names"
                        )
                else:
                    strict_common_ancestor = True
                    outs = set(mainout.split())

                if outs - target_seqs:
                    raise TaskError(
                        self,
                        "Unknown seqs cannot be used to set first split rooting:%s"
                        % (outs - target_seqs))

                if len(outs) > 1:
                    anchor = list(set(target_seqs) - outs)[0]
                    ttree.set_outgroup(ttree & anchor)
                    common = ttree.get_common_ancestor(outs)
                    out_seqs = common.get_leaf_names()
                    if common is ttree:
                        msg = "First split outgroup could not be granted:%s" % out_seqs
                        #dump_tree_debug(msg, self.taskdir, mtree, ttree, target_seqs, outs)
                        raise TaskError(self, msg)
                    if strict_common_ancestor and set(out_seqs) ^ outs:
                        msg = "Monophyly of first split outgroup could not be granted:%s" % out_seqs
                        #dump_tree_debug(msg, self.taskdir, mtree, ttree, target_seqs, outs)
                        raise TaskError(self, msg)

                    log.log(
                        26, "@@8:First split rooting to %d seqs@@1:: %s" %
                        (len(out_seqs), out_seqs))
                    ttree.set_outgroup(common)
                else:
                    single_out = outs.pop()
                    common = ttree.set_outgroup(single_out)
                    log.log(
                        26, "@@8:First split rooting to 1 seq@@1:: %s" %
                        (single_out))

            self.main_tree = ttree
            orig_target = ttree

        tn = orig_target.copy()
        self.pre_iter_task_tree = tn
        self.rf = orig_target.robinson_foulds(ttree)
        self.pre_iter_support = orig_target.support

        # Reloads node2content of the rooted tree and generate cladeids
        ttree_content = self.main_tree.get_cached_content()
        for n, content in ttree_content.iteritems():
            cid = generate_id([_n.name for _n in content])
            n.add_feature("cladeid", cid)

        #ttree.write(outfile=self.pruned_tree)
        self.task_tree = ttree
Esempio n. 2
0
def process_task(task, wkname, npr_conf, nodeid2info):
    cogconf, cogclass = npr_conf.cog_selector
    concatconf, concatclass = npr_conf.alg_concatenator
    treebuilderconf, treebuilderclass = npr_conf.tree_builder
    splitterconf, splitterclass = npr_conf.tree_splitter

    threadid, nodeid, seqtype, ttype = (task.threadid, task.nodeid, task.seqtype, task.ttype)
    cladeid, targets, outgroups = db.get_node_info(threadid, nodeid)

    if not treebuilderclass or task.size < 4:
        # Allows to dump algs in workflows with no tree tasks or if tree
        # inference does not make sense given the number of sequences. DummyTree
        # will produce a fake fully collapsed newick tree.
        treebuilderclass = DummyTree

    if outgroups and len(outgroups) > 1:
        constrain_id = nodeid
    else:
        constrain_id = None

    node_info = nodeid2info[nodeid]
    conf = GLOBALS[task.configid]
    new_tasks = []
    if ttype == "cog_selector":

        # Generates a md5 id based on the genetree configuration workflow used
        # for the concat alg task. If something changes, concat alg will change
        # and the associated tree will be rebuilt
        config_blocks = set([wkname])
        for key, value in conf[wkname].iteritems():
            if isinstance(value, list) or isinstance(value, tuple) or isinstance(value, set):
                for elem in value:
                    config_blocks.add(elem[1:]) if isinstance(elem, str) and elem.startswith("@") else None
            elif isinstance(value, str):
                config_blocks.add(value[1:]) if value.startswith("@") else None
        config_checksum = md5("".join(["[%s]\n%s" % (x, dict_string(conf[x])) for x in sorted(config_blocks)]))

        # THIS PART HAS BEEN MOVED TO COG_SELECTOR TASK
        # Check that current selection of cogs will cover all target and
        # outgroup species
        # cog_hard_limit = int(conf[concatconf]["_max_cogs"])
        # sp_repr = defaultdict(int)
        # for co in task.raw_cogs[:cog_hard_limit]:
        #    for sp, seq in co:
        #        sp_repr[sp] += 1
        # missing_sp = (targets | outgroups) - set(sp_repr.keys())
        # if missing_sp:
        #    raise TaskError("missing species under current cog selection: %s" %missing_sp)
        # else:
        #    log.log(28, "Analysis of current COG selection:")
        #    for sp, ncogs in sorted(sp_repr.items(), key=lambda x:x[1]):
        #        log.log(28, "   % 30s species present in % 6d COGs" %(sp, ncogs))

        # register concat alignment task. NodeId associated to concat_alg tasks
        # and all its children jobs should take into account cog information and
        # not only species and outgroups included.

        concat_job = concatclass(task.cogs, seqtype, conf, concatconf, config_checksum)
        db.add_node(threadid, concat_job.nodeid, cladeid, targets, outgroups)

        # Register Tree constrains
        constrain_tree = "(%s, (%s));" % (",".join(sorted(outgroups)), ",".join(sorted(targets)))
        _outs = "\n".join(map(lambda name: ">%s\n0" % name, sorted(outgroups)))
        _tars = "\n".join(map(lambda name: ">%s\n1" % name, sorted(targets)))
        constrain_alg = "\n".join([_outs, _tars])
        db.add_task_data(concat_job.nodeid, DATATYPES.constrain_tree, constrain_tree)
        db.add_task_data(concat_job.nodeid, DATATYPES.constrain_alg, constrain_alg)
        db.dataconn.commit()  # since the creation of some Task objects
        # may require this info, I need to commit
        # right now.
        concat_job.size = task.size
        new_tasks.append(concat_job)

    elif ttype == "concat_alg":
        # register tree for concat alignment, using constraint tree if
        # necessary
        alg_id = db.get_dataid(task.taskid, DATATYPES.concat_alg_phylip)
        try:
            parts_id = db.get_dataid(task.taskid, DATATYPES.model_partitions)
        except ValueError:
            parts_id = None

        nodeid2info[nodeid]["size"] = task.size
        nodeid2info[nodeid]["target_seqs"] = targets
        nodeid2info[nodeid]["out_seqs"] = outgroups

        tree_task = treebuilderclass(
            nodeid, alg_id, constrain_id, None, seqtype, conf, treebuilderconf, parts_id=parts_id
        )
        tree_task.size = task.size
        new_tasks.append(tree_task)

    elif ttype == "tree":
        merger_task = splitterclass(nodeid, seqtype, task.tree_file, conf, splitterconf)
        merger_task.size = task.size
        new_tasks.append(merger_task)

    elif ttype == "treemerger":
        # Lets merge with main tree
        if not task.task_tree:
            task.finish()

        log.log(24, "Saving task tree...")
        annotate_node(task.task_tree, task)
        db.update_node(nid=task.nodeid, runid=task.threadid, newick=db.encode(task.task_tree))
        db.commit()

        if not isinstance(treebuilderclass, DummyTree) and npr_conf.max_iters > 1:
            current_iter = get_iternumber(threadid)
            if npr_conf.max_iters and current_iter >= npr_conf.max_iters:
                log.warning("Maximum number of iterations reached!")
            else:
                # Add new nodes
                source_seqtype = "aa" if "aa" in GLOBALS["seqtypes"] else "nt"
                ttree, mtree = task.task_tree, task.main_tree

                log.log(26, "Processing tree: %s seqs, %s outgroups", len(targets), len(outgroups))

                target_cladeids = None
                if tobool(conf[splitterconf].get("_find_ncbi_targets", False)):
                    tcopy = mtree.copy()
                    ncbi.connect_database()
                    tax2name, tax2track = ncbi.annotate_tree_with_taxa(tcopy, None)
                    # tax2name, tax2track = ncbi.annotate_tree_with_taxa(tcopy, "fake") # for testing sptree example
                    n2content = tcopy.get_cached_content()
                    broken_branches, broken_clades, broken_clade_sizes, tax2name = ncbi.get_broken_branches(
                        tcopy, n2content
                    )
                    log.log(
                        28,
                        "restricting NPR to broken clades: "
                        + colorify(", ".join(map(lambda x: "%s" % tax2name[x], broken_clades)), "wr"),
                    )
                    target_cladeids = set()
                    for branch in broken_branches:
                        print branch.get_ascii(attributes=["spname", "taxid"], compact=True)
                        print map(lambda x: "%s" % tax2name[x], broken_branches[branch])
                        target_cladeids.add(branch.cladeid)

                for node, seqs, outs, wkname in get_next_npr_node(
                    task.configid, ttree, task.out_seqs, mtree, None, npr_conf, target_cladeids
                ):  # None is to avoid alg checks
                    log.log(24, "Adding new node: %s seqs, %s outgroups", len(seqs), len(outs))
                    new_task_node = cogclass(seqs, outs, source_seqtype, conf, cogconf)
                    new_task_node.target_wkname = wkname
                    new_tasks.append(new_task_node)
                    db.add_node(
                        threadid,
                        new_task_node.nodeid,
                        new_task_node.cladeid,
                        new_task_node.targets,
                        new_task_node.outgroups,
                    )
    return new_tasks
Esempio n. 3
0
def process_task(task, wkname, npr_conf, nodeid2info):
    cogconf, cogclass = npr_conf.cog_selector
    concatconf, concatclass = npr_conf.alg_concatenator
    treebuilderconf, treebuilderclass = npr_conf.tree_builder
    splitterconf, splitterclass = npr_conf.tree_splitter

    threadid, nodeid, seqtype, ttype = (task.threadid, task.nodeid,
                                        task.seqtype, task.ttype)
    cladeid, targets, outgroups = db.get_node_info(threadid, nodeid)

    if not treebuilderclass or task.size < 4:
        # Allows to dump algs in workflows with no tree tasks or if tree
        # inference does not make sense given the number of sequences. DummyTree
        # will produce a fake fully collapsed newick tree.
        treebuilderclass = DummyTree

    if outgroups and len(outgroups) > 1:
        constrain_id = nodeid
    else:
        constrain_id = None

    node_info = nodeid2info[nodeid]
    conf = GLOBALS[task.configid]
    new_tasks = []
    if ttype == "cog_selector":

        # Generates a md5 id based on the genetree configuration workflow used
        # for the concat alg task. If something changes, concat alg will change
        # and the associated tree will be rebuilt
        config_blocks = set([wkname])
        for key, value in conf[wkname].iteritems():
            if isinstance(value, list) or  isinstance(value, tuple) \
                    or isinstance(value, set):
                for elem in value:
                    config_blocks.add(elem[1:]) if isinstance(
                        elem, str) and elem.startswith("@") else None
            elif isinstance(value, str):
                config_blocks.add(value[1:]) if value.startswith("@") else None
        config_checksum = md5(''.join([
            "[%s]\n%s" % (x, dict_string(conf[x]))
            for x in sorted(config_blocks)
        ]))

        # THIS PART HAS BEEN MOVED TO COG_SELECTOR TASK
        # Check that current selection of cogs will cover all target and
        # outgroup species
        #cog_hard_limit = int(conf[concatconf]["_max_cogs"])
        #sp_repr = defaultdict(int)
        #for co in task.raw_cogs[:cog_hard_limit]:
        #    for sp, seq in co:
        #        sp_repr[sp] += 1
        #missing_sp = (targets | outgroups) - set(sp_repr.keys())
        #if missing_sp:
        #    raise TaskError("missing species under current cog selection: %s" %missing_sp)
        #else:
        #    log.log(28, "Analysis of current COG selection:")
        #    for sp, ncogs in sorted(sp_repr.items(), key=lambda x:x[1]):
        #        log.log(28, "   % 30s species present in % 6d COGs" %(sp, ncogs))

        # register concat alignment task. NodeId associated to concat_alg tasks
        # and all its children jobs should take into account cog information and
        # not only species and outgroups included.

        concat_job = concatclass(task.cogs, seqtype, conf, concatconf,
                                 config_checksum)
        db.add_node(threadid, concat_job.nodeid, cladeid, targets, outgroups)

        # Register Tree constrains
        constrain_tree = "(%s, (%s));" % (','.join(
            sorted(outgroups)), ','.join(sorted(targets)))
        _outs = "\n".join(map(lambda name: ">%s\n0" % name, sorted(outgroups)))
        _tars = "\n".join(map(lambda name: ">%s\n1" % name, sorted(targets)))
        constrain_alg = '\n'.join([_outs, _tars])
        db.add_task_data(concat_job.nodeid, DATATYPES.constrain_tree,
                         constrain_tree)
        db.add_task_data(concat_job.nodeid, DATATYPES.constrain_alg,
                         constrain_alg)
        db.dataconn.commit()  # since the creation of some Task objects
        # may require this info, I need to commit
        # right now.
        concat_job.size = task.size
        new_tasks.append(concat_job)

    elif ttype == "concat_alg":
        # register tree for concat alignment, using constraint tree if
        # necessary
        alg_id = db.get_dataid(task.taskid, DATATYPES.concat_alg_phylip)
        try:
            parts_id = db.get_dataid(task.taskid, DATATYPES.model_partitions)
        except ValueError:
            parts_id = None

        nodeid2info[nodeid]["size"] = task.size
        nodeid2info[nodeid]["target_seqs"] = targets
        nodeid2info[nodeid]["out_seqs"] = outgroups

        tree_task = treebuilderclass(nodeid,
                                     alg_id,
                                     constrain_id,
                                     None,
                                     seqtype,
                                     conf,
                                     treebuilderconf,
                                     parts_id=parts_id)
        tree_task.size = task.size
        new_tasks.append(tree_task)

    elif ttype == "tree":
        merger_task = splitterclass(nodeid, seqtype, task.tree_file, conf,
                                    splitterconf)
        merger_task.size = task.size
        new_tasks.append(merger_task)

    elif ttype == "treemerger":
        # Lets merge with main tree
        if not task.task_tree:
            task.finish()

        log.log(24, "Saving task tree...")
        annotate_node(task.task_tree, task)
        db.update_node(nid=task.nodeid,
                       runid=task.threadid,
                       newick=db.encode(task.task_tree))
        db.commit()

        if not isinstance(treebuilderclass,
                          DummyTree) and npr_conf.max_iters > 1:
            current_iter = get_iternumber(threadid)
            if npr_conf.max_iters and current_iter >= npr_conf.max_iters:
                log.warning("Maximum number of iterations reached!")
            else:
                # Add new nodes
                source_seqtype = "aa" if "aa" in GLOBALS["seqtypes"] else "nt"
                ttree, mtree = task.task_tree, task.main_tree

                log.log(26, "Processing tree: %s seqs, %s outgroups",
                        len(targets), len(outgroups))

                target_cladeids = None
                if tobool(conf[splitterconf].get("_find_ncbi_targets", False)):
                    tcopy = mtree.copy()
                    ncbi.connect_database()
                    tax2name, tax2track = ncbi.annotate_tree_with_taxa(
                        tcopy, None)
                    #tax2name, tax2track = ncbi.annotate_tree_with_taxa(tcopy, "fake") # for testing sptree example
                    n2content = tcopy.get_cached_content()
                    broken_branches, broken_clades, broken_clade_sizes, tax2name = ncbi.get_broken_branches(
                        tcopy, n2content)
                    log.log(
                        28, 'restricting NPR to broken clades: ' + colorify(
                            ', '.join(
                                map(lambda x: "%s" % tax2name[x],
                                    broken_clades)), "wr"))
                    target_cladeids = set()
                    for branch in broken_branches:
                        print branch.get_ascii(attributes=['spname', 'taxid'],
                                               compact=True)
                        print map(lambda x: "%s" % tax2name[x],
                                  broken_branches[branch])
                        target_cladeids.add(branch.cladeid)

                for node, seqs, outs, wkname in get_next_npr_node(
                        task.configid, ttree, task.out_seqs, mtree, None,
                        npr_conf,
                        target_cladeids):  # None is to avoid alg checks
                    log.log(24, "Adding new node: %s seqs, %s outgroups",
                            len(seqs), len(outs))
                    new_task_node = cogclass(seqs, outs, source_seqtype, conf,
                                             cogconf)
                    new_task_node.target_wkname = wkname
                    new_tasks.append(new_task_node)
                    db.add_node(threadid, new_task_node.nodeid,
                                new_task_node.cladeid, new_task_node.targets,
                                new_task_node.outgroups)
    return new_tasks
Esempio n. 4
0
File: merger.py Progetto: a1an77/ete
    def finish(self):
        def euc_dist(x, y):
            return len(x.symmetric_difference(y)) / float((len(x) + len(y)))
        dataid = db.get_dataid(*self.task_tree_file.split("."))
        ttree = PhyloTree(db.get_data(dataid))
        mtree = self.main_tree
        ttree.dist = 0
        cladeid, target_seqs, out_seqs = db.get_node_info(self.threadid, self.nodeid)
        self.out_seqs = out_seqs
        self.target_seqs = target_seqs

        ttree_content = ttree.get_cached_content()
        if mtree and not out_seqs:
            mtree_content = mtree.get_cached_content()
            log.log(24, "Finding best scoring outgroup from previous iteration.")
            for _n in mtree_content:
                if _n.cladeid == cladeid:
                    orig_target = _n 
            target_left = set([_n.name for _n in mtree_content[orig_target.children[0]]])
            target_right = set([_n.name for _n in mtree_content[orig_target.children[1]]])
                    
            partition_pairs = []
            everything = set([_n.name for _n in ttree_content[ttree]])
            for n, content in ttree_content.iteritems():
                if n is ttree:
                    continue
                left = set([_n.name for _n in content])
                right =  everything - left
                d1 = euc_dist(left, target_left)
                d2 = euc_dist(left, target_right)
                best_match = min(d1, d2)
                partition_pairs.append([best_match, left, right, n])

            partition_pairs.sort()
            
            self.outgroup_match_dist = partition_pairs[0][0]
            #self.outgroup_match = '#'.join( ['|'.join(partition_pairs[0][1]),
            #                      '|'.join(partition_pairs[0][2])] )

            
            outgroup = partition_pairs[0][3]
            ttree.set_outgroup(outgroup)
      
            ttree.dist = orig_target.dist
            ttree.support = orig_target.support

            # Merge task and main trees
            parent = orig_target.up
            orig_target.detach()
            parent.add_child(ttree)

        elif mtree and out_seqs:
            log.log(26, "Rooting tree using %d custom seqs" %
                   len(out_seqs))

            self.outgroup_match = '|'.join(out_seqs)
                        
            #log.log(22, "Out seqs:    %s", len(out_seqs))
            #log.log(22, "Target seqs: %s", target_seqs)
            if len(out_seqs) > 1:
                #first root to a single seqs outside the outgroup
                #(should never fail and avoids random outgroup split
                #problems in unrooted trees)
                ttree.set_outgroup(ttree & list(target_seqs)[0])
                # Now tries to get the outgroup node as a monophyletic clade
                outgroup = ttree.get_common_ancestor(out_seqs)
                if set(outgroup.get_leaf_names()) ^ out_seqs:
                    msg = "Monophyly of the selected outgroup could not be granted! Probably constrain tree failed."
                    #dump_tree_debug(msg, self.taskdir, mtree, ttree, target_seqs, out_seqs)
                    raise TaskError(self, msg)
            else:
                outgroup = ttree & list(out_seqs)[0]

            ttree.set_outgroup(outgroup)
            orig_target = self.main_tree.get_common_ancestor(target_seqs)
            found_target = outgroup.get_sisters()[0]

            ttree = ttree.get_common_ancestor(target_seqs)
            outgroup.detach()
            self.pre_iter_support = orig_target.support
            # Use previous dist and support
            ttree.dist = orig_target.dist
            ttree.support = orig_target.support
            parent = orig_target.up
            orig_target.detach()
            parent.add_child(ttree)
               
        else:
            # ROOTS FIRST ITERATION
            log.log(24, "Getting outgroup for first NPR split")
            
            # if early split is provided in the command line, it
            # overrides config file
            mainout = GLOBALS.get("first_split_outgroup", "midpoint")
            
            if mainout.lower() == "midpoint":
                log.log(26, "Rooting to midpoint.")
                best_outgroup = ttree.get_midpoint_outgroup()
                if best_outgroup:
                    ttree.set_outgroup(best_outgroup)
                else:
                    log.warning("Midpoint outgroup could not be set!")
                    ttree.set_outgroup(ttree.iter_leaves().next())
            else:
                if mainout.startswith("~"):
                    # Lazy defined outgroup. Will trust in the common
                    # ancestor of two or more OTUs
                    strict_common_ancestor = False
                    outs = set(mainout[1:].split())
                    if len(outs) < 2:          
                        raise TaskError(self, "First split outgroup error: common "
                                        "ancestor calculation requires at least two OTU names")
                else:
                    strict_common_ancestor = True
                    outs = set(mainout.split())

                if outs - target_seqs:
                    raise TaskError(self, "Unknown seqs cannot be used to set first split rooting:%s" %(outs - target_seqs))
                    
                if len(outs) > 1:
                    anchor = list(set(target_seqs) - outs)[0]
                    ttree.set_outgroup(ttree & anchor)
                    common = ttree.get_common_ancestor(outs)
                    out_seqs = common.get_leaf_names()
                    if common is ttree:
                        msg = "First split outgroup could not be granted:%s" %out_seqs
                        #dump_tree_debug(msg, self.taskdir, mtree, ttree, target_seqs, outs)
                        raise TaskError(self, msg)
                    if strict_common_ancestor and set(out_seqs) ^ outs:
                        msg = "Monophyly of first split outgroup could not be granted:%s" %out_seqs
                        #dump_tree_debug(msg, self.taskdir, mtree, ttree, target_seqs, outs)
                        raise TaskError(self, msg)
                    
                    log.log(26, "@@8:First split rooting to %d seqs@@1:: %s" %(len(out_seqs),out_seqs))
                    ttree.set_outgroup(common)
                else:
                    single_out = outs.pop()
                    common = ttree.set_outgroup(single_out)
                    log.log(26, "@@8:First split rooting to 1 seq@@1:: %s" %(single_out))
                    
            self.main_tree = ttree
            orig_target = ttree

        tn = orig_target.copy()
        self.pre_iter_task_tree = tn
        self.rf = orig_target.robinson_foulds(ttree)
        self.pre_iter_support = orig_target.support
                
        # Reloads node2content of the rooted tree and generate cladeids
        ttree_content = self.main_tree.get_cached_content()
        for n, content in ttree_content.iteritems():
            cid = generate_id([_n.name for _n in content])
            n.add_feature("cladeid", cid)

        #ttree.write(outfile=self.pruned_tree)
        self.task_tree = ttree