Ejemplo n.º 1
0
 def get_alignment_decomposition_tree(self, p_tree):
     assert isinstance(p_tree, PhylogeneticTree)
     if self.options.alignment_decomposition_tree is None:
         return PhylogeneticTree(Tree(p_tree.den_tree))
     elif p_tree.count_leaves() != self.root_problem.subtree.count_leaves():
         raise ValueError(
             "Alignment decomposition tree can be different from placement tree only if placement subset size is set to the number of taxa (i.e. entire tree)"
         )
     else:
         _LOG.info("Reading alignment decomposition input tree: %s" %
                   (self.options.alignment_decomposition_tree))
         d_tree = PhylogeneticTree(
             dendropy.Tree(
                 stream=self.options.alignment_decomposition_tree,
                 schema="newick",
                 preserve_underscores=True,
                 taxon_set=self.root_problem.subtree.get_tree().taxon_set))
         return d_tree
Ejemplo n.º 2
0
    def build_subproblems(self):
        (alignment, tree) = self.read_alignment_and_tree()
        
        if options().distance != 1:
            self.compute_distances(alignment)
        
        
        assert isinstance(tree, PhylogeneticTree)
        assert isinstance(alignment, MutableAlignment)

        tree.get_tree().resolve_polytomies()
        # Label edges with numbers so that we could assemble things back
        # at the end
        tree.lable_edges()        

        ''' Make sure size values are set, and are meaningful. '''
        self.check_and_set_sizes(alignment.get_num_taxa())        

        self._create_root_problem(tree, alignment)

        ''' Decompose the tree based on placement subsets'''
        placement_tree_map = PhylogeneticTree(Tree(tree.den_tree)).decompose_tree(
                                        self.options.placement_size, 
                                        strategy=self.strategy, 
                                        minSize = self.minsubsetsize,
                                        tree_map = {},pdistance = 1,distances = self.distances)
        assert len(placement_tree_map) > 0, ("Tree could not be decomposed"
                " given the following settings; strategy:%s minsubsetsize:%s placement_size:%s" 
                %(self.strategy, self.minsubsetsize, self.options.placement_size))                    
        _LOG.info("Breaking into %d placement subsets." %len(placement_tree_map))

        ''' For placement subsets create a placement subproblem, and decompose further'''
        for (p_key,p_tree) in placement_tree_map.items():
            assert isinstance(p_tree, PhylogeneticTree)
            placement_problem  = SeppProblem(p_tree.leaf_node_names(), self.root_problem)
            placement_problem.subtree = p_tree
            placement_problem.label = "P_%s" %str(p_key)
            _LOG.debug("Placement subset %s has %d nodes" %(placement_problem.label,len(p_tree.leaf_node_names())))
            ''' Further decompose to alignment subsets '''
            alignment_tree_map = PhylogeneticTree(Tree(p_tree.den_tree)).decompose_tree(
                                        self.options.alignment_size, 
                                        strategy=self.strategy, 
                                        minSize = self.minsubsetsize,
                                        tree_map = {}, decomp_strategy = self.options.decomp_strategy, pdistance = options().distance,distances = self.distances)
            assert len(alignment_tree_map) > 0, ("Tree could not be decomposed"
            " given the following settings; strategy:%s minsubsetsize:%s alignmet_size:%s" 
            %(self.strategy, self.minsubsetsize, self.options.alignment_size))
                        
            _LOG.debug("Placement subset %s has %d alignment subsets: %s" %(placement_problem.label,len(alignment_tree_map.keys()),str(sorted(alignment_tree_map.keys()))))
            _LOG.debug("Placement subset %s has %d taxa:" %(placement_problem.label,sum([len(a_tree.leaf_node_names()) for a_tree in alignment_tree_map.values()])))
            for (a_key, a_tree) in alignment_tree_map.items():
                assert isinstance(a_tree, PhylogeneticTree)  
                self.modify_tree(a_tree)
                alignment_problem  = SeppProblem(a_tree.leaf_node_names(), 
                                                  placement_problem)
                alignment_problem.subtree = a_tree
                alignment_problem.label = "A_%s_%s" %(str(p_key),str(a_key))                                                       
        
        ''' Divide fragments into chunks, to help achieve better parallelism'''
        fragment_chunk_files = self.create_fragment_files()                
        for alignment_problem in self.root_problem.iter_leaves():       
            for afc in xrange(0,len(fragment_chunk_files)):
                frag_chunk_problem  = SeppProblem(alignment_problem.taxa, 
                                              alignment_problem)
                frag_chunk_problem.subtree = alignment_problem.subtree
                frag_chunk_problem.label = alignment_problem.label.replace("A_", "FC_") + "_" +str(afc)
                frag_chunk_problem.fragments = fragment_chunk_files[afc]
                    
        _LOG.info("Breaking into %d alignment subsets." %(len(list(self.root_problem.iter_leaves()))))    
        _LOG.info("Breaking each alignment subset into %d fragment chunks." %len(fragment_chunk_files))
        _LOG.info("Subproblem structure: %s" %str(self.root_problem))
        return self.root_problem
Ejemplo n.º 3
0
    def launch_alignment(self, context_str=None):
        '''
        '''
        if self.killed:
            raise RuntimeError("PastaAligner Job killed")

        self._reset_jobs()
        self.context_str = context_str
        if self.context_str is None:
            self.context_str = ''
        node_count = self.tree.count_nodes()
        _LOG.debug("Recursive merge on a branch with %d subsets" %
                   (node_count))
        prefix = "subsets tree: %s" % self.tree.compose_newick()[0:200]
        if node_count == 2:
            nodes = self.tree._tree.nodes()
            _LOG.debug("%s ... pairwise merge " % prefix)
            self.skip_merge = False
            self.subjob1 = self.pasta_team.subsets[nodes[0].label]
            self.subjob2 = self.pasta_team.subsets[nodes[1].label]

            self.subjob1.add_parent(self)
            self.add_child(self.subjob1)

            self.subjob2.add_parent(self)
            self.add_child(self.subjob2)
        else:
            _LOG.debug("%s ... recursing further " % prefix)
            self.skip_merge = True

            # Reroot near centroid edge
            ce = self.tree.get_centroid_edge(spanning=True)
            nr = ce.head_node if not ce.head_node.is_leaf() else ce.tail_node
            self.tree._tree.reroot_at_node(nr, delete_outdegree_one=False)
            _LOG.debug("rerooted to: %s ..." %
                       self.tree.compose_newick()[0:200])
            # For each path from root to its children, create a new merge job
            merge_job_list = []
            nr = self.tree._tree.seed_node
            children = nr.child_nodes()
            for keepchild in children:
                remchilds = []
                for remchild in children:
                    if remchild != keepchild:
                        remchilds.append(
                            nr.reversible_remove_child(remchild,
                                                       suppress_deg_two=False))
                t1 = PhylogeneticTree(Tree(self.tree._tree))
                remchilds.reverse()
                for child in remchilds:
                    nr.reinsert_nodes(child)
                _LOG.debug("child = %s ..." % t1.compose_newick()[0:200])
                multilocus_dataset1 = self.multilocus_dataset.new_with_shared_meta(
                )

                if t1.count_nodes() == 2:
                    ns = t1._tree.nodes()
                    tmp_dir_par = self.get_pairwise_temp_dir(
                        ns[0].label, ns[1].label)
                else:
                    tmp_dir_par = self.tmp_base_dir
                configuration = self.configuration()
                cj = PASTAMergerJob(multilocus_dataset=multilocus_dataset1,
                                    pasta_team=self.pasta_team,
                                    tree=t1,
                                    tmp_base_dir=self.tmp_base_dir,
                                    tmp_dir_par=tmp_dir_par,
                                    delete_temps2=False,
                                    **configuration)
                cj.add_parent(self)
                self.add_child(cj)
                merge_job_list.append(cj)

            self.merge_job_list = merge_job_list

            # now launch these new merge jobs
            for merge_job in self.merge_job_list:
                if self.killed:
                    raise RuntimeError("PastaAligner Job killed")
                merge_job.launch_alignment()

            self._merge_queued_event.set()

            if self.killed:
                raise RuntimeError("PastaAligner Job killed")
        return