Python PhylogeneticTree.compose_newickの例

プログラミング言語: Python

名前空間/パッケージ名: pasta.tree

クラス/型: PhylogeneticTree

メソッド/関数: compose_newick

hotexamples.comのコード掲載数: 6

Python PhylogeneticTree.compose_newick - 6件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのpasta.tree.PhylogeneticTree.compose_newickの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

PhylogeneticTree(12)

compose_newick(3)

calc_splits(2)

count_nodes(1)

get_centroid_edge(1)

leaf_node_names(1)

sum_brlen(1)

コード例 #1

ファイルを表示

ファイル: pastajob.py プロジェクト: SagesWang/pasta

 def build_subsets_tree(self, curr_tmp_dir_par):
     translate={}
     t2 = {}
     for node in self.tree._tree.leaf_iter():
         nalsj = self.pasta_team.subsets[node.taxon.label]            
         newname = nalsj.tmp_dir_par[len(curr_tmp_dir_par)+1:]
         translate[node.taxon.label] = newname
         t2[newname] = set([nalsj])            
     subsets_tree = PhylogeneticTree(read_newick_with_translate(StringIO(self.tree_str),translate_dict=translate))
     for node in subsets_tree._tree.leaf_iter():            
         node.alignment_subset_job = t2[node.taxon]
     del t2
     del translate
     _LOG.debug("nodes labeled")        
     #subsets_tree._tree.infer_taxa()
     #_LOG.debug("fake taxa inferred")                   
     #Then make sure the tree is rooted at a branch (not at a node). 
     if len(subsets_tree._tree.seed_node.child_nodes()) > 2:
         subsets_tree._tree.reroot_at_edge(subsets_tree._tree.seed_node.child_nodes()[0].edge)                        
     _LOG.debug("Subset Labeling (start):\n%s" %str(subsets_tree.compose_newick()))
     # Then label internal branches based on their children, and collapse redundant edges. 
     for node in subsets_tree._tree.postorder_internal_node_iter():
         # my label is the intersection of my children, 
         # unless the intersection is empty, in which case it is the union
         if not hasattr(node, "alignment_subset_job") or node.alignment_subset_job is None:
             node.alignment_subset_job = set.intersection(*[c.alignment_subset_job for c in node.child_nodes()])
             if not node.alignment_subset_job:
                 node.alignment_subset_job = set.union(*[c.alignment_subset_job for c in node.child_nodes()])
         # Now go ahead and prune any child whose label encompasses my label. 
         # Use indexing instead of iteration, because with each collapse, 
         # new children can be added, and we want to process them as well.                         
         i = 0;
         while i < len(node.child_nodes()):                                
             c = node.child_nodes()[i]
             if node.alignment_subset_job.issubset(c.alignment_subset_job):
                 # Dendropy does not collapsing and edge that leads to a tip. Remove instead
                 if c.child_nodes():
                     c.edge.collapse()                                    
                 else:
                     node.remove_child(c)                      
             else:
                 i += 1
         
     # Now, the remaining edges have multiple labels. These need to
     # be further resolved. Do it by minimum length
     #   First find all candidate edges that we might want to contract
     candidate_edges = set()
     for e in subsets_tree._tree.postorder_edge_iter():
         if e.tail_node and e.head_node.alignment_subset_job.intersection(e.tail_node.alignment_subset_job):
             candidate_edges.add( (e.length,e) )
     #   Then sort the edges, and start removing them one by one
     #   only if an edge is still having intersecting labels at the two ends                                                    
     candidate_edges = sorted(candidate_edges)        
     for (el, edge) in candidate_edges:
         I = edge.tail_node.alignment_subset_job.intersection(edge.head_node.alignment_subset_job)
         if I:
             edge.tail_node.alignment_subset_job = I 
             if edge.head_node.child_nodes():
                 edge.collapse()
             else:
                 edge.tail_node.remove_child(edge.head_node)
     # Make sure the tree is correct, remove the actual jobs
     # from nodes (can cause deep-copy problems), assign a label to each
     # node, and keep a mapping between the labels and actual alignment job objects
     self.pasta_team.subsets = {} # Let this now map from subset labels to the actual alignment jobs
     for node in subsets_tree._tree.postorder_node_iter():
         assert len(node.alignment_subset_job) == 1
         nalsj = node.alignment_subset_job.pop()
         node.alignment_subset_job = None 
         node.label = nalsj.tmp_dir_par[len(curr_tmp_dir_par)+1:]
         self.pasta_team.subsets[node.label] = nalsj
         if node.is_leaf():
             # Add a dummy taxon, or else dendropy can get confused
             node.taxon = Taxon(label=node.label)
     #subsets_tree._tree.infer_taxa()
     return subsets_tree

コード例 #2

ファイルを表示

ファイル: pastajob.py プロジェクト: xieduo7/pasta

    def build_subsets_tree(self, curr_tmp_dir_par, build_min_tree=True):
        # uym2 added: add option for MST
        if build_min_tree:
            _LOG.debug("START building Minimum Spanning Tree")
            grouping = {}
            groupName2jobName = {}

            for node in self.tree._tree.leaf_node_iter():
                groupName = self.pasta_team.subsets[
                    node.taxon.label].tmp_dir_par[len(curr_tmp_dir_par) + 1:]
                grouping[node.taxon.label] = groupName.replace("/", "")
                groupName2jobName[groupName] = self.pasta_team.subsets[
                    node.taxon.label]

            subsets_tree = build_groups_MST(self.tree._tree, grouping)

            for node in subsets_tree.postorder_node_iter():
                if node.is_leaf():
                    node.taxon.label = node.taxon.label.replace("d", "/d")
                node.label = node.label.replace("d", "/d")

            self.pasta_team.subsets = groupName2jobName
            MST = PhylogeneticTree(subsets_tree)
            _LOG.debug("Spanning tree is:\n %s" % MST)
            return MST

    ###################################

        _LOG.debug("START building heuristic spanning tree")

        translate = {}
        t2 = {}
        for node in self.tree._tree.leaf_node_iter():
            nalsj = self.pasta_team.subsets[node.taxon.label]
            newname = nalsj.tmp_dir_par[len(curr_tmp_dir_par) + 1:]
            translate[node.taxon.label] = newname
            t2[newname] = set([nalsj])
        subsets_tree = PhylogeneticTree(
            Tree.get(data=self.tree_str, schema='newick'))
        for node in subsets_tree._tree.leaf_node_iter():
            node.alignment_subset_job = t2[translate[node.taxon.label]]
            #node.alignment_subset_job = t2[node.taxon]
        del t2
        del translate
        _LOG.debug("leafs labeled")
        #subsets_tree._tree.infer_taxa()
        #_LOG.debug("fake taxa inferred")
        #Then make sure the tree is rooted at a branch (not at a node).
        if len(subsets_tree._tree.seed_node.child_nodes()) > 2:
            for c in subsets_tree._tree.seed_node.child_nodes():
                if c.edge.is_internal():
                    break
            subsets_tree._tree.is_rooted = True
            subsets_tree._tree.reroot_at_edge(c.edge,
                                              length1=c.edge.length / 2.,
                                              length2=c.edge.length / 2.,
                                              suppress_unifurcations=False)
        _LOG.debug(
            "Subset Labeling (start):\n%s" %
            str(subsets_tree.compose_newick(suppress_rooting=False))[0:5000])
        #_LOG.debug("Subset Labeling (start):\n%s" %str(len(subsets_tree._tree.seed_node.child_nodes())))
        # Then label internal branches based on their children, and collapse redundant edges.
        for node in subsets_tree._tree.postorder_internal_node_iter():
            # my label is the intersection of my children,
            # unless the intersection is empty, in which case it is the union
            if not hasattr(node, "alignment_subset_job"
                           ) or node.alignment_subset_job is None:
                node.alignment_subset_job = set.intersection(
                    *[c.alignment_subset_job for c in node.child_nodes()])
                if not node.alignment_subset_job:
                    node.alignment_subset_job = set.union(
                        *[c.alignment_subset_job for c in node.child_nodes()])
            # Now go ahead and prune any child whose label encompasses my label.
            # Use indexing instead of iteration, because with each collapse,
            # new children can be added, and we want to process them as well.
            i = 0
            while i < len(node.child_nodes()):
                c = node.child_nodes()[i]
                if node.alignment_subset_job.issubset(c.alignment_subset_job):
                    # Dendropy does not collapsing and edge that leads to a tip. Remove instead
                    if c.child_nodes():
                        c.edge.collapse()
                    else:
                        node.remove_child(c)
                else:
                    i += 1

            node.label = "+".join(nj.tmp_dir_par[len(curr_tmp_dir_par) + 1:]
                                  for nj in node.alignment_subset_job)
            if node.is_leaf():
                node.taxon = subsets_tree._tree.taxon_namespace.new_taxon(
                    label=node.label)

        _LOG.debug(
            "Before final round, the tree is:\n %s" %
            str(subsets_tree.compose_newick(suppress_rooting=False))[0:5000])
        # Now, the remaining edges have multiple labels. These need to
        # be further resolved. Do it by minimum length
        #   First find all candidate edges that we might want to contract
        candidate_edges = set()
        for e in subsets_tree._tree.postorder_edge_iter():
            if e.tail_node and e.head_node.alignment_subset_job.intersection(
                    e.tail_node.alignment_subset_job):
                candidate_edges.add((e.length, e))
        #   Then sort the edges, and start removing them one by one
        #   only if an edge is still having intersecting labels at the two ends
        candidate_edges = sorted(candidate_edges,
                                 key=lambda x: x[0] if x[0] else -1)
        for (el, edge) in candidate_edges:
            I = edge.tail_node.alignment_subset_job.intersection(
                edge.head_node.alignment_subset_job)
            if I:
                edge.tail_node.alignment_subset_job = I
                if edge.head_node.child_nodes():
                    #edge.collapse(adjust_collapsed_head_children_edge_lengths=True)
                    edge.collapse()
                else:
                    edge.tail_node.remove_child(edge.head_node)
        # Make sure the tree is correct, remove the actual jobs
        # from nodes (can cause deep-copy problems), assign a label to each
        # node, and keep a mapping between the labels and actual alignment job objects
        self.pasta_team.subsets = {
        }  # Let this now map from subset labels to the actual alignment jobs
        for node in subsets_tree._tree.postorder_node_iter():
            assert len(node.alignment_subset_job) == 1
            nalsj = node.alignment_subset_job.pop()
            node.alignment_subset_job = None
            node.label = nalsj.tmp_dir_par[
                len(curr_tmp_dir_par) + 1:]  #only find last part of the name
            self.pasta_team.subsets[node.label] = nalsj
            if node.is_leaf():
                # Add a dummy taxon, or else dendropy can get confused
                node.taxon = subsets_tree._tree.taxon_namespace.new_taxon(
                    label=node.label)
        #subsets_tree._tree.infer_taxa()
        _LOG.debug("Spanning tree is:\n %s" % subsets_tree)
        labels = [nd.label for nd in subsets_tree._tree.postorder_node_iter()]
        if len(set(labels)) != len(labels):
            import collections
            raise Exception("Duplicate names found %s" % "\n".join(
                item for item, count in collections.Counter(labels).items()
                if count > 1))

        return subsets_tree

コード例 #3

ファイルを表示

ファイル: pastajob.py プロジェクト: smirarab/pasta

    def build_subsets_tree(self, curr_tmp_dir_par,build_min_tree=True):
    # uym2 added: add option for MST
        if build_min_tree:
            _LOG.debug("START building Minimum Spanning Tree")
            grouping = {}
            groupName2jobName = {}
            
            for node in self.tree._tree.leaf_node_iter():
                groupName = self.pasta_team.subsets[node.taxon.label].tmp_dir_par[len(curr_tmp_dir_par)+1:]
                grouping[node.taxon.label] = groupName.replace("/","")
                groupName2jobName[groupName] = self.pasta_team.subsets[node.taxon.label]
            
            subsets_tree = build_groups_MST(self.tree._tree,grouping)
 
            for node in subsets_tree.postorder_node_iter():
               if node.is_leaf():
                   node.taxon.label = node.taxon.label.replace("d","/d")
               node.label = node.label.replace("d","/d") 

            self.pasta_team.subsets = groupName2jobName
            MST = PhylogeneticTree(subsets_tree) 
            _LOG.debug("Spanning tree is:\n %s" %MST)
            return MST
    ###################################


        _LOG.debug("START building heuristic spanning tree")

        translate={}
        t2 = {}
        for node in self.tree._tree.leaf_node_iter():
            nalsj = self.pasta_team.subsets[node.taxon.label]            
            newname = nalsj.tmp_dir_par[len(curr_tmp_dir_par)+1:]
            translate[node.taxon.label] = newname
            t2[newname] = set([nalsj])            
        subsets_tree = PhylogeneticTree(Tree.get(data=self.tree_str,schema='newick'))
        for node in subsets_tree._tree.leaf_node_iter():
            node.alignment_subset_job = t2[translate[node.taxon.label]]
            #node.alignment_subset_job = t2[node.taxon]
        del t2
        del translate
        _LOG.debug("leafs labeled")        
        #subsets_tree._tree.infer_taxa()
        #_LOG.debug("fake taxa inferred")                   
        #Then make sure the tree is rooted at a branch (not at a node). 
        if len(subsets_tree._tree.seed_node.child_nodes()) > 2:
            for c in subsets_tree._tree.seed_node.child_nodes():
                if c.edge.is_internal():
                    break
            subsets_tree._tree.is_rooted = True
            subsets_tree._tree.reroot_at_edge(c.edge,length1=c.edge.length/2., 
                                              length2=c.edge.length/2., suppress_unifurcations=False)                        
        _LOG.debug("Subset Labeling (start):\n%s" %str(subsets_tree.compose_newick(suppress_rooting=False))[0:5000])
        #_LOG.debug("Subset Labeling (start):\n%s" %str(len(subsets_tree._tree.seed_node.child_nodes())))
        # Then label internal branches based on their children, and collapse redundant edges. 
        for node in subsets_tree._tree.postorder_internal_node_iter():
            # my label is the intersection of my children, 
            # unless the intersection is empty, in which case it is the union
            if not hasattr(node, "alignment_subset_job") or node.alignment_subset_job is None:
                node.alignment_subset_job = set.intersection(*[c.alignment_subset_job for c in node.child_nodes()])
                if not node.alignment_subset_job:
                    node.alignment_subset_job = set.union(*[c.alignment_subset_job for c in node.child_nodes()])
            # Now go ahead and prune any child whose label encompasses my label. 
            # Use indexing instead of iteration, because with each collapse, 
            # new children can be added, and we want to process them as well.                         
            i = 0;
            while i < len(node.child_nodes()):                                
                c = node.child_nodes()[i]
                if node.alignment_subset_job.issubset(c.alignment_subset_job):
                    # Dendropy does not collapsing and edge that leads to a tip. Remove instead
                    if c.child_nodes():
                        c.edge.collapse()                                    
                    else:
                        node.remove_child(c)                      
                else:
                    i += 1
            
            node.label = "+".join(nj.tmp_dir_par[len(curr_tmp_dir_par)+1:] for nj in node.alignment_subset_job)
            if node.is_leaf():
                node.taxon = subsets_tree._tree.taxon_namespace.new_taxon(label=node.label)
            
        _LOG.debug("Before final round, the tree is:\n %s" %str(subsets_tree.compose_newick(suppress_rooting=False))[0:5000])
        # Now, the remaining edges have multiple labels. These need to
        # be further resolved. Do it by minimum length
        #   First find all candidate edges that we might want to contract
        candidate_edges = set()
        for e in subsets_tree._tree.postorder_edge_iter():
            if e.tail_node and e.head_node.alignment_subset_job.intersection(e.tail_node.alignment_subset_job):
                candidate_edges.add( (e.length,e) )
        #   Then sort the edges, and start removing them one by one
        #   only if an edge is still having intersecting labels at the two ends                                                    
        candidate_edges = sorted(candidate_edges, key=lambda x:  x[0] if x[0] else -1)       
        for (el, edge) in candidate_edges:
            I = edge.tail_node.alignment_subset_job.intersection(edge.head_node.alignment_subset_job)
            if I:
                edge.tail_node.alignment_subset_job = I 
                if edge.head_node.child_nodes():
                    #edge.collapse(adjust_collapsed_head_children_edge_lengths=True)
                    edge.collapse()
                else:
                    edge.tail_node.remove_child(edge.head_node)
        # Make sure the tree is correct, remove the actual jobs
        # from nodes (can cause deep-copy problems), assign a label to each
        # node, and keep a mapping between the labels and actual alignment job objects
        self.pasta_team.subsets = {} # Let this now map from subset labels to the actual alignment jobs
        for node in subsets_tree._tree.postorder_node_iter():
            assert len(node.alignment_subset_job) == 1
            nalsj = node.alignment_subset_job.pop()
            node.alignment_subset_job = None 
            node.label = nalsj.tmp_dir_par[len(curr_tmp_dir_par)+1:]#only find last part of the name
            self.pasta_team.subsets[node.label] = nalsj
            if node.is_leaf():
                # Add a dummy taxon, or else dendropy can get confused
                node.taxon = subsets_tree._tree.taxon_namespace.new_taxon(label=node.label)
        #subsets_tree._tree.infer_taxa()
        _LOG.debug("Spanning tree is:\n %s" %subsets_tree)
        labels = [nd.label for nd in subsets_tree._tree.postorder_node_iter()]
        if len(set(labels)) != len(labels):
            import collections
            raise Exception("Duplicate names found %s" %"\n".join
                   (item for item, count in 
                    collections.Counter(labels).items() if count > 1))
           
        return subsets_tree

コード例 #4

ファイルを表示

    def launch_alignment(self, context_str=None):
        '''
        '''
        if self.killed:
            raise RuntimeError("PastaAligner Job killed")

        self._reset_jobs()
        self.context_str = context_str
        if self.context_str is None:
            self.context_str = ''
        node_count = self.tree.count_nodes()
        _LOG.debug("Recursive merge on a branch with %d subsets" %
                   (node_count))
        prefix = "subsets tree: %s" % self.tree.compose_newick()[0:200]
        if node_count == 2:
            nodes = self.tree._tree.nodes()
            _LOG.debug("%s ... pairwise merge " % prefix)
            self.skip_merge = False
            self.subjob1 = self.pasta_team.subsets[nodes[0].label]
            self.subjob2 = self.pasta_team.subsets[nodes[1].label]

            self.subjob1.add_parent(self)
            self.add_child(self.subjob1)

            self.subjob2.add_parent(self)
            self.add_child(self.subjob2)
        else:
            _LOG.debug("%s ... recursing further " % prefix)
            self.skip_merge = True

            # Reroot near centroid edge
            ce = self.tree.get_centroid_edge(spanning=True)
            nr = ce.head_node if not ce.head_node.is_leaf() else ce.tail_node
            self.tree._tree.reroot_at_node(nr, suppress_unifurcations=False)
            _LOG.debug("rerooted to: %s ..." %
                       self.tree.compose_newick()[0:200])
            # For each path from root to its children, create a new merge job
            merge_job_list = []
            nr = self.tree._tree.seed_node
            children = nr.child_nodes()
            for keepchild in children:
                remchilds = []
                for remchild in children:
                    if remchild != keepchild:
                        remchilds.append(
                            nr.reversible_remove_child(
                                remchild, suppress_unifurcations=False))
                t1 = PhylogeneticTree(Tree(self.tree._tree))
                remchilds.reverse()
                for child in remchilds:
                    nr.reinsert_nodes(child)
                _LOG.debug("child = %s ..." % t1.compose_newick()[0:200])
                multilocus_dataset1 = self.multilocus_dataset.new_with_shared_meta(
                )

                if t1.count_nodes() == 2:
                    ns = t1._tree.nodes()
                    tmp_dir_par = self.get_pairwise_temp_dir(
                        ns[0].label, ns[1].label)
                else:
                    tmp_dir_par = self.tmp_base_dir
                configuration = self.configuration()
                cj = PASTAMergerJob(multilocus_dataset=multilocus_dataset1,
                                    pasta_team=self.pasta_team,
                                    tree=t1,
                                    tmp_base_dir=self.tmp_base_dir,
                                    tmp_dir_par=tmp_dir_par,
                                    delete_temps2=False,
                                    **configuration)
                cj.add_parent(self)
                self.add_child(cj)
                merge_job_list.append(cj)

            self.merge_job_list = merge_job_list

            # now launch these new merge jobs
            for merge_job in self.merge_job_list:
                if self.killed:
                    raise RuntimeError("PastaAligner Job killed")
                merge_job.launch_alignment()

            self._merge_queued_event.set()

            if self.killed:
                raise RuntimeError("PastaAligner Job killed")
        return

コード例 #5

ファイルを表示

    def build_subsets_tree(self, curr_tmp_dir_par):
        translate = {}
        t2 = {}
        for node in self.tree._tree.leaf_iter():
            nalsj = self.pasta_team.subsets[node.taxon.label]
            newname = nalsj.tmp_dir_par[len(curr_tmp_dir_par) + 1:]
            translate[node.taxon.label] = newname
            t2[newname] = set([nalsj])
        subsets_tree = PhylogeneticTree(
            read_newick_with_translate(StringIO(self.tree_str),
                                       translate_dict=translate))
        for node in subsets_tree._tree.leaf_iter():
            node.alignment_subset_job = t2[node.taxon]
        del t2
        del translate
        _LOG.debug("nodes labeled")
        #subsets_tree._tree.infer_taxa()
        #_LOG.debug("fake taxa inferred")
        #Then make sure the tree is rooted at a branch (not at a node).
        if len(subsets_tree._tree.seed_node.child_nodes()) > 2:
            subsets_tree._tree.reroot_at_edge(
                subsets_tree._tree.seed_node.child_nodes()[0].edge)
        _LOG.debug("Subset Labeling (start):\n%s" %
                   str(subsets_tree.compose_newick()))
        # Then label internal branches based on their children, and collapse redundant edges.
        for node in subsets_tree._tree.postorder_internal_node_iter():
            # my label is the intersection of my children,
            # unless the intersection is empty, in which case it is the union
            if not hasattr(node, "alignment_subset_job"
                           ) or node.alignment_subset_job is None:
                node.alignment_subset_job = set.intersection(
                    *[c.alignment_subset_job for c in node.child_nodes()])
                if not node.alignment_subset_job:
                    node.alignment_subset_job = set.union(
                        *[c.alignment_subset_job for c in node.child_nodes()])
            # Now go ahead and prune any child whose label encompasses my label.
            # Use indexing instead of iteration, because with each collapse,
            # new children can be added, and we want to process them as well.
            i = 0
            while i < len(node.child_nodes()):
                c = node.child_nodes()[i]
                if node.alignment_subset_job.issubset(c.alignment_subset_job):
                    # Dendropy does not collapsing and edge that leads to a tip. Remove instead
                    if c.child_nodes():
                        c.edge.collapse()
                    else:
                        node.remove_child(c)
                else:
                    i += 1

        # Now, the remaining edges have multiple labels. These need to
        # be further resolved. Do it by minimum length
        #   First find all candidate edges that we might want to contract
        candidate_edges = set()
        for e in subsets_tree._tree.postorder_edge_iter():
            if e.tail_node and e.head_node.alignment_subset_job.intersection(
                    e.tail_node.alignment_subset_job):
                candidate_edges.add((e.length, e))
        #   Then sort the edges, and start removing them one by one
        #   only if an edge is still having intersecting labels at the two ends
        candidate_edges = sorted(candidate_edges)
        for (el, edge) in candidate_edges:
            I = edge.tail_node.alignment_subset_job.intersection(
                edge.head_node.alignment_subset_job)
            if I:
                edge.tail_node.alignment_subset_job = I
                if edge.head_node.child_nodes():
                    edge.collapse()
                else:
                    edge.tail_node.remove_child(edge.head_node)
        # Make sure the tree is correct, remove the actual jobs
        # from nodes (can cause deep-copy problems), assign a label to each
        # node, and keep a mapping between the labels and actual alignment job objects
        self.pasta_team.subsets = {
        }  # Let this now map from subset labels to the actual alignment jobs
        for node in subsets_tree._tree.postorder_node_iter():
            assert len(node.alignment_subset_job) == 1
            nalsj = node.alignment_subset_job.pop()
            node.alignment_subset_job = None
            node.label = nalsj.tmp_dir_par[len(curr_tmp_dir_par) + 1:]
            self.pasta_team.subsets[node.label] = nalsj
            if node.is_leaf():
                # Add a dummy taxon, or else dendropy can get confused
                node.taxon = Taxon(label=node.label)
        #subsets_tree._tree.infer_taxa()
        return subsets_tree

コード例 #6

ファイルを表示

ファイル: pastaalignerjob.py プロジェクト: SagesWang/pasta

    def launch_alignment(self, context_str=None):
        '''
        '''
        if self.killed:
            raise RuntimeError("PastaAligner Job killed")

        self._reset_jobs()
        self.context_str = context_str
        if self.context_str is None:
            self.context_str = ''
        node_count = self.tree.count_nodes()
        _LOG.debug("Recursive merge on a branch with %d subsets" % (node_count))
        prefix = "subsets tree: %s" %self.tree.compose_newick()[0:200]
        if node_count == 2:
            nodes = self.tree._tree.nodes()
            _LOG.debug("%s ... pairwise merge " % prefix)
            self.skip_merge = False
            self.subjob1 = self.pasta_team.subsets[nodes[0].label]           
            self.subjob2 = self.pasta_team.subsets[nodes[1].label]
            
            self.subjob1.add_parent(self)
            self.add_child(self.subjob1)

            self.subjob2.add_parent(self)
            self.add_child(self.subjob2)                                        
        else:
            _LOG.debug("%s ... recursing further " % prefix)
            self.skip_merge = True
            
            # Reroot near centroid edge
            ce = self.tree.get_centroid_edge(spanning=True)
            nr = ce.head_node if not ce.head_node.is_leaf() else ce.tail_node
            self.tree._tree.reroot_at_node(nr,delete_outdegree_one=False)            
            _LOG.debug("rerooted to: %s ..." % self.tree.compose_newick()[0:200])   
            # For each path from root to its children, create a new merge job         
            merge_job_list = []
            nr = self.tree._tree.seed_node
            children = nr.child_nodes()
            for keepchild in children:                
                remchilds = []                
                for remchild in children:
                    if remchild != keepchild:
                        remchilds.append(nr.reversible_remove_child(remchild, suppress_deg_two=False))
                t1 = PhylogeneticTree(Tree(self.tree._tree))
                remchilds.reverse()
                for child in remchilds:
                    nr.reinsert_nodes(child)
                _LOG.debug("child = %s ..." % t1.compose_newick()[0:200])
                multilocus_dataset1 = self.multilocus_dataset.new_with_shared_meta()
                
                if t1.count_nodes() == 2:            
                    ns = t1._tree.nodes()
                    tmp_dir_par = self.get_pairwise_temp_dir(ns[0].label, ns[1].label)
                else:
                    tmp_dir_par = self.tmp_base_dir                    
                configuration = self.configuration()
                cj = PASTAMergerJob(multilocus_dataset=multilocus_dataset1,
                                    pasta_team=self.pasta_team,
                                    tree=t1,
                                    tmp_base_dir=self.tmp_base_dir,
                                    tmp_dir_par= tmp_dir_par,
                                    delete_temps2=False,
                                    **configuration)
                cj.add_parent(self)
                self.add_child(cj)                                
                merge_job_list.append(cj);
                        
            self.merge_job_list = merge_job_list
            
            # now launch these new merge jobs
            for merge_job in self.merge_job_list:
                if self.killed:
                    raise RuntimeError("PastaAligner Job killed")
                merge_job.launch_alignment()

            self._merge_queued_event.set()
            
            if self.killed:
                raise RuntimeError("PastaAligner Job killed")
        return