def get_alignment_decomposition_tree(self, p_tree): assert isinstance(p_tree, PhylogeneticTree) if self.options.alignment_decomposition_tree is None: return PhylogeneticTree(Tree(p_tree.den_tree)) elif p_tree.count_leaves() != self.root_problem.subtree.count_leaves(): raise ValueError( "Alignment decomposition tree can be different from placement tree only if placement subset size is set to the number of taxa (i.e. entire tree)" ) else: _LOG.info("Reading alignment decomposition input tree: %s" % (self.options.alignment_decomposition_tree)) d_tree = PhylogeneticTree( dendropy.Tree( stream=self.options.alignment_decomposition_tree, schema="newick", preserve_underscores=True, taxon_set=self.root_problem.subtree.get_tree().taxon_set)) return d_tree
def build_subproblems(self): (alignment, tree) = self.read_alignment_and_tree() if options().distance != 1: self.compute_distances(alignment) assert isinstance(tree, PhylogeneticTree) assert isinstance(alignment, MutableAlignment) tree.get_tree().resolve_polytomies() # Label edges with numbers so that we could assemble things back # at the end tree.lable_edges() ''' Make sure size values are set, and are meaningful. ''' self.check_and_set_sizes(alignment.get_num_taxa()) self._create_root_problem(tree, alignment) ''' Decompose the tree based on placement subsets''' placement_tree_map = PhylogeneticTree(Tree(tree.den_tree)).decompose_tree( self.options.placement_size, strategy=self.strategy, minSize = self.minsubsetsize, tree_map = {},pdistance = 1,distances = self.distances) assert len(placement_tree_map) > 0, ("Tree could not be decomposed" " given the following settings; strategy:%s minsubsetsize:%s placement_size:%s" %(self.strategy, self.minsubsetsize, self.options.placement_size)) _LOG.info("Breaking into %d placement subsets." %len(placement_tree_map)) ''' For placement subsets create a placement subproblem, and decompose further''' for (p_key,p_tree) in placement_tree_map.items(): assert isinstance(p_tree, PhylogeneticTree) placement_problem = SeppProblem(p_tree.leaf_node_names(), self.root_problem) placement_problem.subtree = p_tree placement_problem.label = "P_%s" %str(p_key) _LOG.debug("Placement subset %s has %d nodes" %(placement_problem.label,len(p_tree.leaf_node_names()))) ''' Further decompose to alignment subsets ''' alignment_tree_map = PhylogeneticTree(Tree(p_tree.den_tree)).decompose_tree( self.options.alignment_size, strategy=self.strategy, minSize = self.minsubsetsize, tree_map = {}, decomp_strategy = self.options.decomp_strategy, pdistance = options().distance,distances = self.distances) assert len(alignment_tree_map) > 0, ("Tree could not be decomposed" " given the following settings; strategy:%s minsubsetsize:%s alignmet_size:%s" %(self.strategy, self.minsubsetsize, self.options.alignment_size)) _LOG.debug("Placement subset %s has %d alignment subsets: %s" %(placement_problem.label,len(alignment_tree_map.keys()),str(sorted(alignment_tree_map.keys())))) _LOG.debug("Placement subset %s has %d taxa:" %(placement_problem.label,sum([len(a_tree.leaf_node_names()) for a_tree in alignment_tree_map.values()]))) for (a_key, a_tree) in alignment_tree_map.items(): assert isinstance(a_tree, PhylogeneticTree) self.modify_tree(a_tree) alignment_problem = SeppProblem(a_tree.leaf_node_names(), placement_problem) alignment_problem.subtree = a_tree alignment_problem.label = "A_%s_%s" %(str(p_key),str(a_key)) ''' Divide fragments into chunks, to help achieve better parallelism''' fragment_chunk_files = self.create_fragment_files() for alignment_problem in self.root_problem.iter_leaves(): for afc in xrange(0,len(fragment_chunk_files)): frag_chunk_problem = SeppProblem(alignment_problem.taxa, alignment_problem) frag_chunk_problem.subtree = alignment_problem.subtree frag_chunk_problem.label = alignment_problem.label.replace("A_", "FC_") + "_" +str(afc) frag_chunk_problem.fragments = fragment_chunk_files[afc] _LOG.info("Breaking into %d alignment subsets." %(len(list(self.root_problem.iter_leaves())))) _LOG.info("Breaking each alignment subset into %d fragment chunks." %len(fragment_chunk_files)) _LOG.info("Subproblem structure: %s" %str(self.root_problem)) return self.root_problem
def launch_alignment(self, context_str=None): ''' ''' if self.killed: raise RuntimeError("PastaAligner Job killed") self._reset_jobs() self.context_str = context_str if self.context_str is None: self.context_str = '' node_count = self.tree.count_nodes() _LOG.debug("Recursive merge on a branch with %d subsets" % (node_count)) prefix = "subsets tree: %s" % self.tree.compose_newick()[0:200] if node_count == 2: nodes = self.tree._tree.nodes() _LOG.debug("%s ... pairwise merge " % prefix) self.skip_merge = False self.subjob1 = self.pasta_team.subsets[nodes[0].label] self.subjob2 = self.pasta_team.subsets[nodes[1].label] self.subjob1.add_parent(self) self.add_child(self.subjob1) self.subjob2.add_parent(self) self.add_child(self.subjob2) else: _LOG.debug("%s ... recursing further " % prefix) self.skip_merge = True # Reroot near centroid edge ce = self.tree.get_centroid_edge(spanning=True) nr = ce.head_node if not ce.head_node.is_leaf() else ce.tail_node self.tree._tree.reroot_at_node(nr, delete_outdegree_one=False) _LOG.debug("rerooted to: %s ..." % self.tree.compose_newick()[0:200]) # For each path from root to its children, create a new merge job merge_job_list = [] nr = self.tree._tree.seed_node children = nr.child_nodes() for keepchild in children: remchilds = [] for remchild in children: if remchild != keepchild: remchilds.append( nr.reversible_remove_child(remchild, suppress_deg_two=False)) t1 = PhylogeneticTree(Tree(self.tree._tree)) remchilds.reverse() for child in remchilds: nr.reinsert_nodes(child) _LOG.debug("child = %s ..." % t1.compose_newick()[0:200]) multilocus_dataset1 = self.multilocus_dataset.new_with_shared_meta( ) if t1.count_nodes() == 2: ns = t1._tree.nodes() tmp_dir_par = self.get_pairwise_temp_dir( ns[0].label, ns[1].label) else: tmp_dir_par = self.tmp_base_dir configuration = self.configuration() cj = PASTAMergerJob(multilocus_dataset=multilocus_dataset1, pasta_team=self.pasta_team, tree=t1, tmp_base_dir=self.tmp_base_dir, tmp_dir_par=tmp_dir_par, delete_temps2=False, **configuration) cj.add_parent(self) self.add_child(cj) merge_job_list.append(cj) self.merge_job_list = merge_job_list # now launch these new merge jobs for merge_job in self.merge_job_list: if self.killed: raise RuntimeError("PastaAligner Job killed") merge_job.launch_alignment() self._merge_queued_event.set() if self.killed: raise RuntimeError("PastaAligner Job killed") return