def get_alignment_decomposition_tree(self, p_tree): assert isinstance(p_tree, PhylogeneticTree) if self.options.alignment_decomposition_tree is None: return PhylogeneticTree(Tree(p_tree.den_tree)) elif p_tree.count_leaves() != self.root_problem.subtree.count_leaves(): raise ValueError("Alignment decomposition tree can be different from placement tree only if placement subset size is set to the number of taxa (i.e. entire tree)") else: _LOG.info("Reading alignment decomposition input tree: %s" %(self.options.alignment_decomposition_tree)) d_tree = PhylogeneticTree( dendropy.Tree.get_from_stream(self.options.alignment_decomposition_tree, schema="newick", preserve_underscores=True, taxon_set=self.root_problem.subtree.get_tree().taxon_set)) return d_tree
def read_alignment_and_tree(self): _LOG.info("Reading input alignment: %s" % (self.options.alignment_file)) alignment = MutableAlignment() alignment.read_file_object(self.options.alignment_file) # fragments = MutableAlignment() # fragments.read_file_object(self.options.fragment_file); _LOG.info("Reading input tree: %s" % self.options.tree_file) tree = PhylogeneticTree( dendropy.Tree.get_from_stream(self.options.tree_file, schema="newick", preserve_underscores=True)) return (alignment, tree)
def build_subproblems(self): (alignment, tree) = self.read_alignment_and_tree() if options().distance != 1: self.compute_distances(alignment) assert isinstance(tree, PhylogeneticTree) assert isinstance(alignment, MutableAlignment) tree.get_tree().resolve_polytomies() # Label edges with numbers so that we could assemble things back # at the end tree.lable_edges() ''' Make sure size values are set, and are meaningful. ''' self.check_and_set_sizes(alignment.get_num_taxa()) self._create_root_problem(tree, alignment) ''' Decompose the tree based on placement subsets''' placement_tree_map = PhylogeneticTree(Tree( tree.den_tree)).decompose_tree( self.options.placement_size, strategy=self.strategy, minSize=self.options.placement_size / int(self.options.exhaustive.placementminsubsetsizefacotr), tree_map={}, pdistance=1, decomp_strategy=self.decomp_strategy, distances=self.distances, maxDiam=None) assert len(placement_tree_map) > 0, ( "Tree could not be decomposed" " given the following settings; strategy:%s minsubsetsize:%s" " placement_size:%s" % (self.strategy, self.minsubsetsize, self.options.placement_size)) _LOG.info("Breaking into %d placement subsets." % len(placement_tree_map)) ''' For placement subsets create a placement subproblem, and decompose further''' for (p_key, p_tree) in placement_tree_map.items(): assert isinstance(p_tree, PhylogeneticTree) placement_problem = SeppProblem(p_tree.leaf_node_names(), self.root_problem) placement_problem.subtree = p_tree placement_problem.label = "P_%s" % str(p_key) _LOG.debug( "Placement subset %s has %d nodes" % (placement_problem.label, len(p_tree.leaf_node_names()))) ''' Further decompose to alignment subsets ''' alignment_tree_map = PhylogeneticTree(Tree( p_tree.den_tree)).decompose_tree( self.options.alignment_size, strategy=self.strategy, minSize=self.minsubsetsize, tree_map={}, decomp_strategy=self.options.decomp_strategy, pdistance=options().distance, distances=self.distances, maxDiam=self.options.maxDiam) assert len(alignment_tree_map) > 0, ( "Tree could not be decomposed" " given the following settings; strategy:%s" " minsubsetsize:%s alignmet_size:%s" % (self.strategy, self.minsubsetsize, self.options.alignment_size)) _LOG.debug("Placement subset %s has %d alignment subsets: %s" % (placement_problem.label, len(alignment_tree_map), str(sorted(alignment_tree_map.keys())))) _LOG.debug("Placement subset %s has %d taxa:" % (placement_problem.label, sum([ len(a_tree.leaf_node_names()) for a_tree in alignment_tree_map.values() ]))) for (a_key, a_tree) in alignment_tree_map.items(): assert isinstance(a_tree, PhylogeneticTree) self.modify_tree(a_tree) alignment_problem = SeppProblem(a_tree.leaf_node_names(), placement_problem) alignment_problem.subtree = a_tree alignment_problem.label = "A_%s_%s" % (str(p_key), str(a_key)) _LOG.info("Breaking into %d alignment subsets." % (len(list(self.root_problem.iter_leaves())))) ''' Divide fragments into chunks, to help achieve better parallelism''' fragment_chunk_files = self.create_fragment_files() self.root_problem.fragment_chunks = len(fragment_chunk_files) for alignment_problem in self.root_problem.iter_leaves(): for afc in range(0, self.root_problem.fragment_chunks): frag_chunk_problem = SeppProblem(alignment_problem.taxa, alignment_problem) frag_chunk_problem.subtree = alignment_problem.subtree frag_chunk_problem.label = alignment_problem.label.replace( "A_", "FC_") + "_" + str(afc) frag_chunk_problem.fragments = fragment_chunk_files[afc] _LOG.info("Breaking each alignment subset into %d fragment chunks." % self.root_problem.fragment_chunks) _LOG.debug("Subproblem structure: %s" % str(self.root_problem)) return self.root_problem
def build_subproblems(self): (alignment, tree) = self.read_alignment_and_tree() assert isinstance(tree, PhylogeneticTree) assert isinstance(alignment, MutableAlignment) tree.get_tree().resolve_polytomies() # Label edges with numbers so that we could assemble things back # at the end tree.lable_edges() ''' Make sure size values are set, and are meaningful. ''' self.check_and_set_sizes(alignment.get_num_taxa()) self._create_root_problem(tree, alignment) ''' Decompose the tree based on placement subsets''' placement_tree_map = PhylogeneticTree(Tree(tree.den_tree)).decompose_tree( self.options.placement_size, strategy=self.strategy, minSize = self.minsubsetsize, tree_map = {}) assert len(placement_tree_map) > 0, ("Tree could not be decomposed" " given the following settings; strategy:%s minsubsetsize:%s placement_size:%s" %(self.strategy, self.minsubsetsize, self.options.placement_size)) _LOG.info("Breaking into %d placement subsets." %len(placement_tree_map)) ''' For placement subsets create a placement subproblem, and decompose further''' for (p_key,p_tree) in placement_tree_map.items(): assert isinstance(p_tree, PhylogeneticTree) placement_problem = SeppProblem(p_tree.leaf_node_names(), self.root_problem) placement_problem.subtree = p_tree placement_problem.label = "P_%s" %str(p_key) _LOG.debug("Placement subset %s has %d nodes" %(placement_problem.label,len(p_tree.leaf_node_names()))) ''' Further decompose to alignment subsets ''' alignment_tree_map = PhylogeneticTree(Tree(p_tree.den_tree)).decompose_tree( self.options.alignment_size, strategy=self.strategy, minSize = self.minsubsetsize, tree_map = {}, decomp_strategy = self.options.decomp_strategy) assert len(alignment_tree_map) > 0, ("Tree could not be decomposed" " given the following settings; strategy:%s minsubsetsize:%s alignmet_size:%s" %(self.strategy, self.minsubsetsize, self.options.alignment_size)) _LOG.debug("Placement subset %s has %d alignment subsets: %s" %(placement_problem.label,len(alignment_tree_map.keys()),str(sorted(alignment_tree_map.keys())))) _LOG.debug("Placement subset %s has %d taxa:" %(placement_problem.label,sum([len(a_tree.leaf_node_names()) for a_tree in alignment_tree_map.values()]))) for (a_key, a_tree) in alignment_tree_map.items(): assert isinstance(a_tree, PhylogeneticTree) self.modify_tree(a_tree) alignment_problem = SeppProblem(a_tree.leaf_node_names(), placement_problem) alignment_problem.subtree = a_tree alignment_problem.label = "A_%s_%s" %(str(p_key),str(a_key)) ''' Divide fragments into chunks, to help achieve better parallelism''' fragment_chunk_files = self.create_fragment_files() for alignment_problem in self.root_problem.iter_leaves(): for afc in xrange(0,len(fragment_chunk_files)): frag_chunk_problem = SeppProblem(alignment_problem.taxa, alignment_problem) frag_chunk_problem.subtree = alignment_problem.subtree frag_chunk_problem.label = alignment_problem.label.replace("A_", "FC_") + "_" +str(afc) frag_chunk_problem.fragments = fragment_chunk_files[afc] _LOG.info("Breaking into %d alignment subsets." %(len(list(self.root_problem.iter_leaves())))) _LOG.info("Breaking each alignment subset into %d fragment chunks." %len(fragment_chunk_files)) _LOG.info("Subproblem structure: %s" %str(self.root_problem)) return self.root_problem