def _data_to_object(self, data, schema=None): """Attaches a DendroPy tree object""" schemas = [ "newick", "nexus" ] # If schemas is not given, try each schema until one works. if schema is None: for s in schemas: # Try different schemas until a tree works try: tree = DendropyTree.get(data=data, schema=s) except: pass else: tree = DendropyTree.get(data=data, schema=schema) # Check that a tree was made. try: self._Tree._DendroPy = tree self._Tree._construct() except NameError: raise Exception("""Tree data doesn't seem to be in a format that DendroPy can read.""") return self._Tree
def add_outgroup(tree, relative_additional_height): desired_height = ( 1 + relative_additional_height) * tree.seed_node.distance_from_tip() outgroup = Node(taxon=Taxon("outgroup"), edge_length=desired_height) tns = deepcopy(tree.taxon_namespace) tns.add_taxon(outgroup.taxon) new_root = Node() new_root.add_child(outgroup) new_root.add_child(tree.seed_node) new_tree = Tree(taxon_namespace=tns) new_tree.seed_node = new_root # Despite my best efforts, I was getting taxon namespace errors. So we round trip # from Newick. ¯\_(ツ)_/¯ new_newick = str(new_tree) + ";" return Tree.get(data=new_newick, schema="newick")
def get_alignment_decomposition_tree(self, p_tree): assert isinstance(p_tree, PhylogeneticTree) if self.options.alignment_decomposition_tree is None: return PhylogeneticTree(Tree(p_tree.den_tree)) elif p_tree.count_leaves() != self.root_problem.subtree.count_leaves(): raise ValueError("Alignment decomposition tree can be different from placement tree only if placement subset size is set to the number of taxa (i.e. entire tree)") else: _LOG.info("Reading alignment decomposition input tree: %s" %(self.options.alignment_decomposition_tree)) d_tree = PhylogeneticTree( dendropy.Tree.get_from_stream(self.options.alignment_decomposition_tree, schema="newick", preserve_underscores=True, taxon_set=self.root_problem.subtree.get_tree().taxon_set)) return d_tree
def _data_to_object(self, data, schema=None): """Attaches a DendroPy tree object""" schemas = ["newick", "nexus"] # If schemas is not given, try each schema until one works. if schema is None: for s in schemas: # Try different schemas until a tree works try: tree = DendropyTree.get(data=data, schema=s) except: pass else: tree = DendropyTree.get(data=data, schema=schema) # Check that a tree was made. try: self._Tree._DendroPy = tree self._Tree._construct() except NameError: raise Exception( """Tree data doesn't seem to be in a format that DendroPy can read.""" ) return self._Tree
def build_subproblems(self): (alignment, tree) = self.read_alignment_and_tree() if options().distance != 1: self.compute_distances(alignment) assert isinstance(tree, PhylogeneticTree) assert isinstance(alignment, MutableAlignment) tree.get_tree().resolve_polytomies() # Label edges with numbers so that we could assemble things back # at the end tree.lable_edges() ''' Make sure size values are set, and are meaningful. ''' self.check_and_set_sizes(alignment.get_num_taxa()) self._create_root_problem(tree, alignment) ''' Decompose the tree based on placement subsets''' placement_tree_map = PhylogeneticTree(Tree( tree.den_tree)).decompose_tree( self.options.placement_size, strategy=self.strategy, minSize=self.options.placement_size / int(self.options.exhaustive.placementminsubsetsizefacotr), tree_map={}, pdistance=1, decomp_strategy=self.decomp_strategy, distances=self.distances, maxDiam=None) assert len(placement_tree_map) > 0, ( "Tree could not be decomposed" " given the following settings; strategy:%s minsubsetsize:%s" " placement_size:%s" % (self.strategy, self.minsubsetsize, self.options.placement_size)) _LOG.info("Breaking into %d placement subsets." % len(placement_tree_map)) ''' For placement subsets create a placement subproblem, and decompose further''' for (p_key, p_tree) in placement_tree_map.items(): assert isinstance(p_tree, PhylogeneticTree) placement_problem = SeppProblem(p_tree.leaf_node_names(), self.root_problem) placement_problem.subtree = p_tree placement_problem.label = "P_%s" % str(p_key) _LOG.debug( "Placement subset %s has %d nodes" % (placement_problem.label, len(p_tree.leaf_node_names()))) ''' Further decompose to alignment subsets ''' alignment_tree_map = PhylogeneticTree(Tree( p_tree.den_tree)).decompose_tree( self.options.alignment_size, strategy=self.strategy, minSize=self.minsubsetsize, tree_map={}, decomp_strategy=self.options.decomp_strategy, pdistance=options().distance, distances=self.distances, maxDiam=self.options.maxDiam) assert len(alignment_tree_map) > 0, ( "Tree could not be decomposed" " given the following settings; strategy:%s" " minsubsetsize:%s alignmet_size:%s" % (self.strategy, self.minsubsetsize, self.options.alignment_size)) _LOG.debug("Placement subset %s has %d alignment subsets: %s" % (placement_problem.label, len(alignment_tree_map), str(sorted(alignment_tree_map.keys())))) _LOG.debug("Placement subset %s has %d taxa:" % (placement_problem.label, sum([ len(a_tree.leaf_node_names()) for a_tree in alignment_tree_map.values() ]))) for (a_key, a_tree) in alignment_tree_map.items(): assert isinstance(a_tree, PhylogeneticTree) self.modify_tree(a_tree) alignment_problem = SeppProblem(a_tree.leaf_node_names(), placement_problem) alignment_problem.subtree = a_tree alignment_problem.label = "A_%s_%s" % (str(p_key), str(a_key)) _LOG.info("Breaking into %d alignment subsets." % (len(list(self.root_problem.iter_leaves())))) ''' Divide fragments into chunks, to help achieve better parallelism''' fragment_chunk_files = self.create_fragment_files() self.root_problem.fragment_chunks = len(fragment_chunk_files) for alignment_problem in self.root_problem.iter_leaves(): for afc in range(0, self.root_problem.fragment_chunks): frag_chunk_problem = SeppProblem(alignment_problem.taxa, alignment_problem) frag_chunk_problem.subtree = alignment_problem.subtree frag_chunk_problem.label = alignment_problem.label.replace( "A_", "FC_") + "_" + str(afc) frag_chunk_problem.fragments = fragment_chunk_files[afc] _LOG.info("Breaking each alignment subset into %d fragment chunks." % self.root_problem.fragment_chunks) _LOG.debug("Subproblem structure: %s" % str(self.root_problem)) return self.root_problem
def launch_alignment(self, context_str=None): ''' ''' if self.killed: raise RuntimeError("PastaAligner Job killed") self._reset_jobs() self.context_str = context_str if self.context_str is None: self.context_str = '' node_count = self.tree.count_nodes() _LOG.debug("Recursive merge on a branch with %d subsets" % (node_count)) prefix = "subsets tree: %s" % self.tree.compose_newick()[0:200] if node_count == 2: nodes = self.tree._tree.nodes() _LOG.debug("%s ... pairwise merge " % prefix) self.skip_merge = False self.subjob1 = self.pasta_team.subsets[nodes[0].label] self.subjob2 = self.pasta_team.subsets[nodes[1].label] self.subjob1.add_parent(self) self.add_child(self.subjob1) self.subjob2.add_parent(self) self.add_child(self.subjob2) else: _LOG.debug("%s ... recursing further " % prefix) self.skip_merge = True # Reroot near centroid edge ce = self.tree.get_centroid_edge(spanning=True) nr = ce.head_node if not ce.head_node.is_leaf() else ce.tail_node self.tree._tree.reroot_at_node(nr, suppress_unifurcations=False) _LOG.debug("rerooted to: %s ..." % self.tree.compose_newick()[0:200]) # For each path from root to its children, create a new merge job merge_job_list = [] nr = self.tree._tree.seed_node children = nr.child_nodes() for keepchild in children: remchilds = [] for remchild in children: if remchild != keepchild: remchilds.append( nr.reversible_remove_child( remchild, suppress_unifurcations=False)) t1 = PhylogeneticTree(Tree(self.tree._tree)) remchilds.reverse() for child in remchilds: nr.reinsert_nodes(child) _LOG.debug("child = %s ..." % t1.compose_newick()[0:200]) multilocus_dataset1 = self.multilocus_dataset.new_with_shared_meta( ) if t1.count_nodes() == 2: ns = t1._tree.nodes() tmp_dir_par = self.get_pairwise_temp_dir( ns[0].label, ns[1].label) else: tmp_dir_par = self.tmp_base_dir configuration = self.configuration() cj = PASTAMergerJob(multilocus_dataset=multilocus_dataset1, pasta_team=self.pasta_team, tree=t1, tmp_base_dir=self.tmp_base_dir, tmp_dir_par=tmp_dir_par, delete_temps2=False, **configuration) cj.add_parent(self) self.add_child(cj) merge_job_list.append(cj) self.merge_job_list = merge_job_list # now launch these new merge jobs for merge_job in self.merge_job_list: if self.killed: raise RuntimeError("PastaAligner Job killed") merge_job.launch_alignment() self._merge_queued_event.set() if self.killed: raise RuntimeError("PastaAligner Job killed") return
for data in dataset: result_file = open('result_{}_nj.txt'.format(data), 'w') for method in distance_methods: #result_file.write(method + '\n') for i in range(20): truth = '../../{}/{}/R{}/rose.tt'.format(data, data, i) predicted_tree_file = (data + '/' + method + '/R'+ str(i) + '/out_tree.nwk') if (not os.path.isfile(predicted_tree_file) or os.stat(predicted_tree_file).st_size == 0): result_file.write(method+',R'+str(i)+',err,err\n') continue true_tree_file = (truth) tree1 = Tree.get_from_path( predicted_tree_file, "newick", taxon_namespace=tns) tree2 = Tree.get_from_path( true_tree_file, "newick", taxon_namespace=tns) tree1.encode_bipartitions() tree2.encode_bipartitions() print('R'+str(i),treecompare.false_positives_and_negatives(tree1, tree2)) result_file.write(method+',R'+str(i)+','+','.join([str(x) for x in treecompare.false_positives_and_negatives(tree1, tree2)])) result_file.write('\n') result_file.close()
def nexus(self, data): Tree = DendropyTree.get(data=data, schema="nexus") return self._data_to_object(Tree)