def get_sum_of_branches(treepath): fin = open(treepath, "r") newick = fin.readline().strip() t = Tree() t.read_from_string(newick.__str__(), "newick") fin.close() return t.length()
def __init__(self, **kwargs): ''' Parameters ---------- reference_tree_path: str Path to the file containing the reference tree, which is used to retroot the tree tree provided to tree tree_path: str Path to the file containing the tree to be re-rooted. This tree will be rerooted at the same position as the tree porovided to the reference_tree ''' reference_tree_path = kwargs.pop('reference_tree_path', None) tree_path = kwargs.pop('tree_path') logging.debug("Importing old tree from file: %s" % tree_path) self.tree = Tree.get(path=tree_path, schema='newick') if reference_tree_path: logging.debug("Importing reference tree from file: %s" % reference_tree_path) self.reference_tree = Tree.get(path=reference_tree_path, schema='newick') else: self.reference_tree = reference_tree_path if len(kwargs) > 0: raise Exception("Unexpected arguments provided to Decorator class: %s" % kwargs)
def __bisect__(t,e): # e = __find_centroid_edge__(t) u = e.tail_node v = e.head_node u.remove_child(v) t1 = Tree(seed_node = v) if u.num_child_nodes() == 1: p = u.parent_node v = u.child_nodes()[0] l_v = v.edge_length u.remove_child(v) if p is None: # u is the seed_node; this means the tree runs out of all but one side t.seed_node = v return t,t1 l_u = u.edge_length p.remove_child(u) p.add_child(v) v.edge_length = l_u+l_v u = p while u is not None: __updateNode__(u) u = u.parent_node t.annotated = True t1.annotated = True return t,t1
def generate_ATT_from_files(seqaln, mattype, workdir, treefile, otu_json, ingroup_mrca=None): """Build an ATT object without phylesystem. If no ingroup mrca ott_id is provided, will use all taxa in tree to calc mrca.""" aln = DnaCharacterMatrix.get(path=seqaln, schema=mattype) for tax in aln.taxon_namespace: tax.label = tax.label.replace(" ", "_") #Forcing all spaces to underscore UGH tre = Tree.get(path=treefile, schema="newick", preserve_underscores=True, taxon_namespace=aln.taxon_namespace) with open(otu_json) as data_file: otu_dict = json.load(data_file) for tax in aln: assert tax.label in otu_dict tre = Tree.get(path=treefile, schema="newick", preserve_underscores=True, taxon_namespace=aln.taxon_namespace) otu_newick = tre.as_string(schema="newick") if ingroup_mrca: ott_mrca = int(ingroup_mrca) else: ott_ids = [otu_dict[otu].get['^ot:ottId'] for otu in otu_dict] ott_mrca = get_mrca_ott(ott_ids) return AlignTreeTax(otu_newick, otu_dict, aln, ingroup_mrca=ott_mrca, workdir=workdir)
def get_subtree(self, taxa): if len(taxa) == 0: return None tree = Tree(self._tree) if isinstance(taxa[0],str): tree.prune_taxa_with_labels(taxa) elif isinstance(taxa[0],Taxon): tree.prune_taxa(taxa) return PhylogeneticTree(tree)
def scale_tree_branch(tree, format="newick"): tree_obj = None if os.path.exists(tree): tree_obj = Tree.get_from_path(tree, format) elif isinstance(tree, str): tree_obj = Tree(stream=StringIO(tree), schema=format) elif isinstance(tree, Tree): tree_obj = Tree if sum([ e.length > 1 for e in tree_obj.postorder_edge_iter()]): for e in tree_obj.postorder_edge_iter(): if e.length is not None: e.length = e.length/100 return tree_obj.as_newick_string()
def test_bootstraps_in_annotated_tree_alongside_empty_taxa(self): self.assertEquals({u'a': [], u'b': [], u'c': ['tax'], u'd': ['tax']}, TaxonomyExtractor().taxonomy_from_annotated_tree(\ Tree.get(data="(a,(b,(c,d:0.2)'0.2:tax')0.01973:0.9)root;", schema='newick')))
def __init__(self, workDir, resultsFile, inFile, coreId, seqType, seedNum, bootNum, method, interLeaved): ''' Data Fields: work_dir = temproray directory inFile = data file, id = core id (int), seqType = d (dna); p (protein); r (rna), bootNum = number of replicates, seedNum = Random number seed between 1 and 32767 method =b (Bootstrap) Default, j (Jackknife) c (Permute species for each character) o (Permute character order) s (Permute within species) r (Rewrite data)), interLeaved=True if sequence data is interleaved otherwise False ''' self.work_dir = workDir self.resultsFile = resultsFile self.inFile = inFile self.coreId = coreId self.seqType = seqType self.seedNum = seedNum self.bootNum = bootNum self.method = method self.n = bootNum self.nt = False self.interLeaved = interLeaved self.outFile = "bootstrap_"+str(coreId)+".out" self.newSpeciesTree = Tree() self.leafLabelStree = [] self.internalExRootSpeceLabels= [] if self.seqType in ['r', 'd']: self.nt = True
def readTreeFromFile( treePath): ''' input: path to the file containing newick tree return Tree object ''' myTree= Tree.get_from_path(treePath, 'newick', annotations_as_nhx=True, extract_comment_metadata=True , suppress_annotations=False) return myTree
def get_bls(tree_path): # clean the tree of any support values, so we're left only with BLs bls = [] t = Tree() t.read_from_path( tree_path, "newick" ) i = t.level_order_edge_iter() while True: try: e = i.next() # in Python 2.x len = e.length if len != None: bls.append( len ) except StopIteration: break return bls
def main(): cpu = sys.argv[1] job_name = sys.argv[2] try: alnfile = sys.argv[3] except: assert(restart is True), "Specified alignment file does not exist. Path?" try: treefile = sys.argv[4] except: assert(restart is True), "Specified tree file does not exist. Path?" # Rewrite tree to create trifurcating root, as needed by phylobayes mpi tree = Tree.get_from_path(treefile, "newick", rooting = "force-unrooted") tree.resolve_polytomies() # in case of polytomies. tree.update_bipartitions() # this will create a trifurcating root on an unrooted tree tstring = str(tree).replace('[&U] ', '') with open('temp.tre', 'w') as tf: tf.write(tstring + ';\n') # Phylobayes is run to chain length 5500, sampling every 5 to yield 1100. Later, burnin of 100 is removed to get a final posterior n=1000 (same procedure as Rodrigue 2013 Genetics) pb_call = "mpirun -np " + str(cpu) + " ./pb_mpi -mutsel -cat -d " + alnfile + " -T temp.tre -x 5 1100 " + job_name run_pb_call = subprocess.call(pb_call, shell = True) assert( run_pb_call == 0 ), "pb_mpi didn't run!" # Parse output with readpb_mpi, using a burnin of 100 and saving everything else (posterior size = 1000) readpb_call = "mpirun -np " + str(cpu) + " ./readpb_mpi -x 100 1 -1 " + job_name + "\n" run_readpb_call = subprocess.call(readpb_call, shell = True) assert( run_readpb_call == 0 ), "readpb_mpi didn't run!"
def get_tree_lines(Tname): stringlist =[] from dendropy import Tree tree = Tree.get_from_path(Tname,"newick") for nd in tree.postorder_internal_node_iter(): for child in nd.child_nodes(): stringlist.append(child.as_newick_string()) return (stringlist)
def ete_to_dendropy(tree): from dendropy import Tree as DTree char_matrix = ete_to_dendropy_cm(tree) taxon_namespace = char_matrix.taxon_namespace dendro_tree = DTree.get(data=tree.write(format=1), schema='newick', taxon_namespace=taxon_namespace) return dendro_tree, char_matrix
def bipartition_by_edge(self, e): """Prunes the subtree that attached to the head_node of edge e and returns them as a separate tree.""" t = self._tree nr = e.head_node assert e.tail_node is not None assert e.head_node is not None assert nr.parent_node is e.tail_node is_valid_tree(t) n = self.n_leaves potentially_deleted_nd = e.tail_node grandparent_nd = potentially_deleted_nd.parent_node e.tail_node.remove_child(nr, suppress_unifurcations=True) nr.edge.length = None nr.parent_node = None convert_node_to_root_polytomy(nr) t1 = PhylogeneticTree(Tree(seed_node=nr)) n1 = t1.n_leaves # temp we could speed this up, by telling the Phylogenetic tree how many leaves it has if hasattr(e, "num_leaves_below"): if grandparent_nd is None: old_root = potentially_deleted_nd if old_root.edge: old_root.edge.num_leaves_below -= n1 else: if potentially_deleted_nd in grandparent_nd.child_nodes(): potentially_deleted_nd.edge.num_leaves_below -= n1 old_root = grandparent_nd if old_root.edge: old_root.edge.num_leaves_below -= n1 while old_root.parent_node: old_root = old_root.parent_node if old_root.edge: old_root.edge.num_leaves_below -= n1 else: old_root = grandparent_nd or potentially_deleted_nd while old_root.parent_node: old_root = old_root.parent_node t2 = PhylogeneticTree(Tree(seed_node=old_root)) is_valid_tree(t1._tree) is_valid_tree(t2._tree) return t1, t2
def readTreeFromString(self, treeString): ''' input: string containing newick tree return Tree object ''' myTree= Tree() myTree= Tree.get_from_string( treeString, 'newick', annotations_as_nhx=True, extract_comment_metadata=True , suppress_annotations=False) return myTree
def remove_internal_labels(strtree): tree = Tree.get_from_string(strtree, schema='newick') for node in tree.postorder_node_iter(): if not node.label is None: if (int(node.label) >= tips_number): node.label = None return tree.as_string(schema="newick")
def read_lsd_results(inputDir): # suppose LSD was run on the "mytree.newick" and all the outputs are placed inside inputDir log_file = normpath(join(inputDir, "mytree.tre.result")) input_tree_file = normpath(join(inputDir, "mytree.tre")) result_tree_file = normpath(join(inputDir, "mytree.tre.result.newick")) s = open(log_file,'r').read() i = s.find("Tree 1 rate ") + 12 mu = "" found_dot = False while (s[i] == '.' and not found_dot) or (s[i] in [str(x) for x in range(10)]): mu += s[i] if s[i] == '.': found_dot = True i += 1 mu = float(mu) taxa = TaxonNamespace() tree = Tree.get_from_path(input_tree_file,schema="newick",taxon_namespace=taxa,rooting="force-rooted") tree.encode_bipartitions() n = len(list(tree.leaf_node_iter())) N = 2*n-2 x0 = [10**-10]*N + [mu] idx = 0 brlen_map = {} for node in tree.postorder_node_iter(): if not node is tree.seed_node: key = node.bipartition brlen_map[key] = (idx,node.edge_length) idx += 1 tree2 = Tree.get_from_path(result_tree_file,schema="newick",taxon_namespace=taxa,rooting="force-rooted") tree2.encode_bipartitions() for node in tree2.postorder_node_iter(): if not node is tree2.seed_node: key = node.bipartition idx,el = brlen_map[key] if el > 0 and node.edge_length>0: x0[idx] = node.edge_length/float(el) return x0
def test_branch_lengths(self): '''https://github.com/geronimp/graftM/issues/192''' taxes = TaxonomyExtractor().taxonomy_from_annotated_tree( Tree.get(path=os.path.join(path_to_data, 'create', 'sulfitereductase.ben.tree'), schema='newick')) self.assertEquals([u'Aanerobic sulfite reductase asrC', u'Anaerobic sulfite reductase asrC Group 3', u'Unknown alpha and beta subunits', u'0.856_PFAM_NIR_SIR,NIR_SIR_ferr'], # number is actually in the clade name taxes['T506DRAFT_scaffold00010.10_60~2561511230'])
def index_mutations(con): """Builds an index of all mutations""" cur = con.cursor() for msaid in get_alignment_method_ids(con): for modelid in get_phylo_modelids(con): newick = get_anc_cladogram(con, msaid, modelid) t = Tree() t.read_from_string(newick, "newick") for edge in t.preorder_edge_iter(): if edge.head_node == None or edge.tail_node == None: continue if edge.head_node.label == None or edge.tail_node.label == None: continue print msaid, modelid, edge.head_node.label, edge.tail_node.label anc1name = "Node" + edge.head_node.label.__str__() anc2name = "Node" + edge.tail_node.label.__str__() index_mutations_helper(con, msaid, modelid, anc1name, anc2name)
def test_remove_sequences_with_named_internal_nodes(self): tc = DendropyTreeCleaner() tree = Tree.get(data="('Asulf_Archaeoglobus.1_2280~2522125074':7.17,(('Afulgi_764~2528311132':0.0,'CP006577_764~2588253768':0.0):0.0,'AE000782_746~638154502':0.0)'s__Archaeoglobus fulgidus':7.555):1.461;\n", schema='newick') tc.remove_sequences(tree, ['CP006577_764~2588253768', 'Afulgi_764~2528311132']) self.assertEqual("(Asulf_Archaeoglobus.1_2280~2522125074:7.17,AE000782_746~638154502:7.555):1.461", str(tree))
def check_list_against_tree(treepath, checklist): '''take a path to a newick tree file and look for any taxa that correspond to keys in the "keep" dictionary, incrementing the value of each one found''' with open(treepath, 'r', encoding='UTF-8') as treefile: check_list_against_taxa( Tree.get_from_stream(treefile, schema="newick", preserve_underscores=True, rooting='default-rooted'), checklist)
def test_yule(script_runner, execution_number, datadir): backbone = os.path.join(datadir, "stem2.backbone.tre") taxonomy = os.path.join(datadir, "stem2.taxonomy.tre") taxed = Tree.get(path=taxonomy, schema="newick") bbone = Tree.get(path=backbone, schema="newick", rooting="default-rooted") result = script_runner.run("tact_add_taxa", "--taxonomy", taxonomy, "--backbone", backbone, "--output", ".tact-pytest-yule", "-vv", "--yule") assert result.returncode == 0 output = ".tact-pytest-yule.newick.tre" tacted = Tree.get(path=output, schema="newick", rooting="default-rooted") ss = tacted.as_ascii_plot() sys.stderr.write(ss) result = script_runner.run("tact_check_results", output, "--taxonomy", taxonomy, "--backbone", backbone, "--output", ".tact-pytest-yule.check.csv", "--cores=1") assert result.returncode == 0 return (tacted, taxed, bbone)
def tree_compare(tempdir): # CHANGE to tempdir tns = dendropy.TaxonNamespace() tree1 = Tree.get_from_path(tempdir + "/ref.tree", "newick", taxon_namespace=tns) tree2 = Tree.get_from_path(tempdir + "/normal_tree", "newick", taxon_namespace=tns) tree3 = Tree.get_from_path(tempdir + "/red_tree", "newick", taxon_namespace=tns) tree1.encode_bipartitions() tree2.encode_bipartitions() tree3.encode_bipartitions() distance_normal = treecompare.symmetric_difference(tree1, tree2) distance_reduced = treecompare.symmetric_difference(tree1, tree3) return distance_normal, distance_reduced
def scale_tree(f_name, n): t = Tree.get(file=open(f_name, 'r'), schema="newick", tree_offset=0) for e in t.edges(): if e.length is not None: e.length = float(n*float(e.length)) t.write(file=open(f_name.replace('.mt', '') + '_' + str(n).replace('.', '_') + '.mt', 'w+'), schema="newick")
def remove_branch_lengths(f, out): t = Tree.get(file=open(f, 'r'), schema="newick") new = open(out, 'w+') for e in t.edges(): e.length = None t.write(file=new, schema="newick")
def get_tree_and_OTT_list(tree_filehandle, sources, verbosity=0): """ Takes a base tree and creates objects for each node and leaf, attaching them as 'data' dictionaries to each node in the DendroPy tree. Nodes and leaves with an OTT id also have pointers to their data dicts stored in an OTT-keyed dict, so that mappings to other databases (ncbi id, etc etc) can be created. We can easily have duplicate leaf names, so for the entire procedure we ignore the Dendropy concept of a taxon list and simply use labels. Returns the Dendropy tree and the OTT dict. """ #these variables are all pointers into the same data ordered_leaves=[] ordered_nodes=[] indexed_by_ott={} try: tree = Tree.get_from_stream(tree_filehandle, schema="newick", preserve_underscores=True, suppress_leaf_node_taxa=True) except: sys.exit("Problem reading tree from " + treefile.name) info("-> read tree from " + tree_filehandle.name) ott_node = re.compile(r"(.*) ott(\d+)(@\d*)?$") #matches the OTT number mrca_ott_node = re.compile(r"(.*) (mrcaott\d+ott\d+)(@\d*)?$") #matches a node with an "mrca" node number (no unique OTT) for i, node in enumerate(tree.preorder_node_iter()): node.data = {'parent':node.parent_node or None} if node.label: node.label = node.label.replace("_"," ") m = ott_node.search(node.label) if m is not None: if m.group(3): warn("Node has an @ sign at the end ({}), meaning it has probably not been substituted by an OpenTree equivalent. You may want to provide an alternative subtree from this node downwards, as otherwise it will probably be deleted from the main tree.".format(node.label)) node.label = m.group(1) node.data['ott'] = int(m.group(2)) indexed_by_ott[node.data['ott']] = node.data node.data['sources']={k:None for k in sources} else: m = mrca_ott_node.search(node.label) if m is not None: if m.group(3): warn("Node has an @ sign at the end ({}), meaning it has probably not been substituted by an OpenTree equivalent. You may want to provide an alternative subtree from this node downwards, as otherwise it will probably be deleted from the main tree.".format(node.label)) node.label = m.group(1) #this is an 'mrca' node, so we want to save sources but *not* save the ott number in node.data indexed_by_ott[m.group(2)] = node.data node.data['sources']={k:None for k in sources} elif node.is_leaf(): warn("Leaf without an OTT id: '{}'. This will not be associated with any other data".format(node.label)) #finally, put underscores at the start or the end of the new label back #as these denote "fake" names that are hidden and only used for mapping #we could keep them as spaces, but leading/trailing underscores are easier to see by eye if node.label[0]==" ": node.label = "_" + node.label[1:] if node.label[-1]==" ": node.label = node.label[:-1] + "_" info("-> extracted {} otts from among {} leaves and nodes".format(len(indexed_by_ott), i)) return tree, indexed_by_ott
def main(): d1 = sys.argv[1] d2 = sys.argv[2] d1_name = basename(d1) d2_name = basename(d2) print('og {} {}'.format(d1_name, d2_name)) d1_files = list(sorted(glob(join(d1, '*', 'RAxML_bipartitions.bipart')))) d2_files = list(sorted(glob(join(d2, '*', 'RAxML_bipartitions.bipart')))) assert len(d1_files) == len(d2_files) for fn1, fn2 in zip(d1_files, d2_files): t1 = Tree.get(path=fn1, schema='newick') t2 = Tree.get(path=fn2, schema='newick') assert tostr(t1) == tostr(t2) t1_og = basename(dirname(fn1)) t2_og = basename(dirname(fn2)) assert t1_og == t2_og labs = zip(get_node_labels(t1), get_node_labels(t2)) for l1, l2 in labs: print(t1_og, l1, l2)
def evaluate(ref, file_name): # To store the data during the process, we create two temporary files. tmp1 = tempfile.mkstemp() tmp2 = tempfile.mkstemp() # Use the commands of fastprot and fnj. # The output of the FastPhylo programs is in file 'tmp2'. os.system("fastprot -m -o " + tmp1[1] + " " + file_name) os.system("fnj -O newick -m FNJ -o " + tmp2[1] + " " + tmp1[1]) #Use Dendropy to compare the trees. in_tree = Tree.get_from_stream(os.fdopen(tmp2[0]), schema='newick', taxon_namespace=tns) ref_tree = Tree.get_from_path(ref, schema='newick', taxon_namespace=tns) sym_diff = treecompare.symmetric_difference(ref_tree, in_tree) return sym_diff
def test_ben_bug(self): new_tree_newick = '(646366661:0.00571,(646777089:0.01427,(2556226606:0.0,2517129521:0.0):0.04312)0.377:0.01170,((650856936:0.01153,(((((646367708:0.01465,(638201361:0.00187,646622935:0.00573)0.940:0.01352)0.988:0.02634,(2519841469:0.06952,(650856136:0.01840,2506713669:0.02486)0.774:0.00888)0.893:0.01193)0.981:0.03778,((649738338:0.07504,(638155665:0.00613,648151945:0.00304)0.995:0.05973)0.884:0.02836,((650752390:0.11644,(2516847065:0.01707,2520801411:0.03442)0.993:0.04619)0.940:0.03278,(640592705:0.14347,637846211:0.11851)0.971:0.04593)0.940:0.03067)0.943:0.03401)0.998:0.06483,(638168675:0.17080,((649738388:0.09935,((((2540854716:0.00325,2553937573:0.00406)1.000:0.09930,(646533023:0.09868,640592823:0.06908)0.951:0.03770)1.000:0.07636,(650872422:0.05527,(650750471:0.05106,(2516847513:0.01440,2520803234:0.02517)0.998:0.05067)0.947:0.03589)0.074:0.01784)0.786:0.03804,(638155700:0.00445,648151981:0.00284)0.894:0.02131)0.995:0.08448)0.999:0.10690,((KYC55281.1:0.28954,(2540666849:0.26647,(2555938320:0.04589,2518907621:0.04631)0.970:0.05624)1.000:0.12340)0.993:0.09723,(((2515321874:0.26529,((637699780:0.01317,(2540563143:0.01361,(638165755:0.01099,638179449:0.01674)0.558:0.00611)0.964:0.01965)1.000:0.10518,(2502870849:0.06989,(648055573:0.14431,(646706666:0.11338,(637960147:0.03570,(2509663319:0.04930,(2519472088:0.03452,2515107634:0.07709)0.639:0.02134)0.957:0.03004)0.316:0.01755)0.809:0.02055)0.323:0.01213)0.991:0.07286)0.974:0.06071)0.997:0.09000,(650797088:0.06590,(639699575:0.03533,2512008957:0.12951)0.779:0.03900)1.000:0.21062)0.685:0.04985,((((640867801:0.08102,(2507462304:0.07476,(643570914:0.08474,((637897753:0.11959,((2509037835:0.14386,(648194984:0.08665,(648195418:0.04239,2506476786:0.04237)0.993:0.03477)0.668:0.01876)0.510:0.00983,((((640115295:0.06428,2540643958:0.01655)1.000:0.05043,2540643737:0.02645)0.502:0.01269,640115052:0.02482)0.987:0.04272,(2507147269:0.04181,2507146024:0.06962)0.615:0.01449)0.611:0.01456)0.992:0.03807)0.542:0.02193,((2525334810:0.02116,640099739:0.01544)0.785:0.00549,(640100248:0.00446,2525335778:0.02444)0.750:0.00227)1.000:0.12583)0.944:0.03489)0.962:0.04171)0.986:0.04499)0.793:0.02738,2509039570:0.05560)1.000:0.18840,(2505968448:0.05750,(2505971857:0.03133,2512783668:0.02305)0.185:0.01848)0.998:0.08344)0.875:0.04885,2518787893:0.16350)0.868:0.04436)1.000:0.14016)0.957:0.05998)0.998:0.08120)0.984:0.05479)0.999:0.07664,(2506713165:0.01408,((650917784:0.03595,640788680:0.07226)0.510:0.02178,(2519842728:0.03972,(646859549:0.04217,(2511672461:0.01672,(640786544:0.03901,(640793336:0.00334,(640165512:0.02037,641283602:0.00189)0.147:0.00210)0.175:0.00497)0.641:0.01093)0.991:0.02914)1.000:0.05940)0.323:0.02509)0.977:0.02843)0.960:0.02127)0.499:0.01743)0.998:0.03986,(638202197:0.00190,(644970377:0.01516,646623830:0.00752)0.678:0.00364)0.903:0.00939)0.626:0.01605);' old_tree_newick = '(((((646366661:0.00564,((2517129521:0,2556226606:0):0.04302,646777089:0.01412)0.499:0.01173)0.999:0.07494,(638202197:0.0019,(644970377:0.01507,646623830:0.0075)0.738:0.00362)0.872:0.00931)0.635:0.01598,650856936:0.01308)0.995:0.04,(2506713165:0.01171,((640788680:0.07255,650917784:0.03571)0.466:0.02189,(2519842728:0.03945,(646859549:0.04217,(2511672461:0.01668,(640786544:0.03894,(640793336:0.00335,(640165512:0.02038,641283602:0.0019)0.155:0.00211)0.174:0.00496)0.668:0.01095)0.987:0.02908)1.000:0.05973)0.285:0.02499)0.985:0.02844)0.967:0.02183)0.668:0.01914,(((646367708:0.01473,(638201361:0.00188,646622935:0.00578)0.947:0.01381)0.981:0.02381,(2519841469:0.06809,(650856136:0.0182,2506713669:0.02508)0.727:0.0089)0.915:0.01182)0.971:0.03729,((649738338:0.07412,(648151945:0.00339,638155665:0.00581)0.999:0.05834)0.847:0.02714,((650752390:0.11368,(2516847065:0.01707,2520801411:0.03449)0.997:0.04731)0.943:0.033,(640592705:0.14071,637846211:0.11886)0.974:0.04714)0.907:0.02899)0.938:0.03341)0.999:0.06584,(638168675:0.16751,((649738388:0.09339,((((2540854716:0.00327,2553937573:0.00411)1.000:0.1001,(640592823:0.06966,646533023:0.10017)0.945:0.03786)0.999:0.07732,(650872422:0.0565,(650750471:0.05131,(2516847513:0.01447,2520803234:0.02528)0.998:0.05086)0.932:0.03598)0.014:0.01864)0.823:0.03947,(648151981:0.00285,638155700:0.0045)0.891:0.01979)0.998:0.08667)0.999:0.11839,((2540666849:0.25749,(2518907621:0.04615,2555938320:0.04332)0.979:0.0624)1.000:0.20869,(((2515321874:0.27886,((637699780:0.01253,(2540563143:0.01408,(638165755:0.01119,638179449:0.01933)0.559:0.0062)0.973:0.01906)1.000:0.10181,(((2509663319:0.05476,(2515107634:0.07727,2519472088:0.03736)0.549:0.02018)0.970:0.02691,(637960147:0.03454,646706666:0.12022)0.328:0.01393)0.935:0.02456,(2502870849:0.07124,648055573:0.13944)0.419:0.01578)0.997:0.07265)0.976:0.05902)0.992:0.09155,(650797088:0.06773,(639699575:0.03844,2512008957:0.12921)0.700:0.03684)1.000:0.19382)0.774:0.0559,((2518787893:0.1617,(2512783668:0.01562,(2505971857:0.02991,2505968448:0.06931)0.820:0.01544)1.000:0.10132)0.000:0.03917,((640867801:0.07687,(2507462304:0.07769,(((637897753:0.11989,((2509037835:0.14075,(648194984:0.08661,(648195418:0.04254,2506476786:0.04232)0.986:0.0348)0.722:0.0191)0.553:0.00998,((((640115295:0.06424,2540643958:0.01643)1.000:0.05042,2540643737:0.02653)0.542:0.01245,640115052:0.0251)0.986:0.04265,(2507146024:0.06963,2507147269:0.0417)0.641:0.01435)0.611:0.01449)0.989:0.03824)0.424:0.02187,((640099739:0.01547,2525334810:0.02122)0.833:0.00545,(640100248:0.00445,2525335778:0.02442)0.761:0.0023)1.000:0.12528)0.944:0.03961,643570914:0.0938)0.959:0.03885)0.984:0.04627)0.758:0.02972,2509039570:0.05614)1.000:0.19872)0.185:0.04422)0.999:0.13795)0.646:0.06267)0.977:0.07517)0.979:0.05715);' old_tree = Tree.get(schema='newick', data=old_tree_newick) tree_to_reroot = Tree.get(schema='newick', data=new_tree_newick) r = Rerooter() reann = Reannotator() new_tree = r.reroot_by_tree( r.reroot(old_tree), r.reroot(tree_to_reroot)) expected_lefts = old_tree.seed_node.child_nodes()[0].leaf_nodes() expected_rights = old_tree.seed_node.child_nodes()[1].leaf_nodes() for tip in expected_lefts: self.assertTrue(tip.taxon.label in [t.taxon.label for t in new_tree.seed_node.child_nodes()[1].leaf_nodes()]) for tip in expected_rights: self.assertTrue(tip.taxon.label in [t.taxon.label for t in new_tree.seed_node.child_nodes()[0].leaf_nodes()]) self.assertEqual(len(tree_to_reroot.leaf_nodes()), len(new_tree.leaf_nodes()))
def return_trees_from_trace(path): print "Parsing trace:", path trees = [] lnls = [] fin = open(path, "r") last_tree = None last_lnl = 0.0 count_unique_trees = 0 for line in fin.xreadlines(): treestring = "" lnlstring = "" found_tree = False for c in line: if found_tree == False and c != "]" and c != "[" and c != "(": lnlstring += c if c == "(": found_tree = True if found_tree == True: treestring += c lnl = float(lnlstring) t = Tree() t.read_from_string(line, "newick") if last_tree != None: #2nd->nth trees in the list #sd = last_tree.symmetric_difference(t) #sd = t.symmetric_difference(last_tree) if last_lnl < lnl: trees.append(t) lnls.append("%.2f"%lnl) count_unique_trees += 1 else: trees[trees.__len__()-1] = t lnls[lnls.__len__()-1] = "%.2f"%lnl else: #first tree in the list trees.append(t) lnls.append("%.2f"%lnl) count_unique_trees += 1 last_tree = t last_lnl = lnl print count_unique_trees, lnl trees.append(last_tree) lnls.append("%.2f"%lnl) fin.close() return [trees, lnls]
def __init__(self,ddpTree=None,tree_file=None,schema="newick",Tree_records=[]): if tree_file: self.ddpTree = Tree.get_from_path(tree_file,schema) else: #self.ddpTree = copy.deepcopy(ddpTree) self.ddpTree = ddpTree self.Tree_records = Tree_records self.opt_score = None self.opt_root = self.ddpTree.seed_node self.opt_x = 0
def assert_tree_equal_no_labels_deprecated(self, expected_newick, observed_tree): expected = Tree.get(schema='newick', data=expected_newick) for node in expected.nodes(): if not node.is_leaf(): node.label = None for node in observed_tree.nodes(): if not node.is_leaf(): node.label = None self.assertEqual(str(expected), str(observed_tree))
def test_ben_bug(self): new_tree_newick = u'(646366661:0.00571,(646777089:0.01427,(2556226606:0.0,2517129521:0.0):0.04312)0.377:0.01170,((650856936:0.01153,(((((646367708:0.01465,(638201361:0.00187,646622935:0.00573)0.940:0.01352)0.988:0.02634,(2519841469:0.06952,(650856136:0.01840,2506713669:0.02486)0.774:0.00888)0.893:0.01193)0.981:0.03778,((649738338:0.07504,(638155665:0.00613,648151945:0.00304)0.995:0.05973)0.884:0.02836,((650752390:0.11644,(2516847065:0.01707,2520801411:0.03442)0.993:0.04619)0.940:0.03278,(640592705:0.14347,637846211:0.11851)0.971:0.04593)0.940:0.03067)0.943:0.03401)0.998:0.06483,(638168675:0.17080,((649738388:0.09935,((((2540854716:0.00325,2553937573:0.00406)1.000:0.09930,(646533023:0.09868,640592823:0.06908)0.951:0.03770)1.000:0.07636,(650872422:0.05527,(650750471:0.05106,(2516847513:0.01440,2520803234:0.02517)0.998:0.05067)0.947:0.03589)0.074:0.01784)0.786:0.03804,(638155700:0.00445,648151981:0.00284)0.894:0.02131)0.995:0.08448)0.999:0.10690,((KYC55281.1:0.28954,(2540666849:0.26647,(2555938320:0.04589,2518907621:0.04631)0.970:0.05624)1.000:0.12340)0.993:0.09723,(((2515321874:0.26529,((637699780:0.01317,(2540563143:0.01361,(638165755:0.01099,638179449:0.01674)0.558:0.00611)0.964:0.01965)1.000:0.10518,(2502870849:0.06989,(648055573:0.14431,(646706666:0.11338,(637960147:0.03570,(2509663319:0.04930,(2519472088:0.03452,2515107634:0.07709)0.639:0.02134)0.957:0.03004)0.316:0.01755)0.809:0.02055)0.323:0.01213)0.991:0.07286)0.974:0.06071)0.997:0.09000,(650797088:0.06590,(639699575:0.03533,2512008957:0.12951)0.779:0.03900)1.000:0.21062)0.685:0.04985,((((640867801:0.08102,(2507462304:0.07476,(643570914:0.08474,((637897753:0.11959,((2509037835:0.14386,(648194984:0.08665,(648195418:0.04239,2506476786:0.04237)0.993:0.03477)0.668:0.01876)0.510:0.00983,((((640115295:0.06428,2540643958:0.01655)1.000:0.05043,2540643737:0.02645)0.502:0.01269,640115052:0.02482)0.987:0.04272,(2507147269:0.04181,2507146024:0.06962)0.615:0.01449)0.611:0.01456)0.992:0.03807)0.542:0.02193,((2525334810:0.02116,640099739:0.01544)0.785:0.00549,(640100248:0.00446,2525335778:0.02444)0.750:0.00227)1.000:0.12583)0.944:0.03489)0.962:0.04171)0.986:0.04499)0.793:0.02738,2509039570:0.05560)1.000:0.18840,(2505968448:0.05750,(2505971857:0.03133,2512783668:0.02305)0.185:0.01848)0.998:0.08344)0.875:0.04885,2518787893:0.16350)0.868:0.04436)1.000:0.14016)0.957:0.05998)0.998:0.08120)0.984:0.05479)0.999:0.07664,(2506713165:0.01408,((650917784:0.03595,640788680:0.07226)0.510:0.02178,(2519842728:0.03972,(646859549:0.04217,(2511672461:0.01672,(640786544:0.03901,(640793336:0.00334,(640165512:0.02037,641283602:0.00189)0.147:0.00210)0.175:0.00497)0.641:0.01093)0.991:0.02914)1.000:0.05940)0.323:0.02509)0.977:0.02843)0.960:0.02127)0.499:0.01743)0.998:0.03986,(638202197:0.00190,(644970377:0.01516,646623830:0.00752)0.678:0.00364)0.903:0.00939)0.626:0.01605);' old_tree_newick = u'(((((646366661:0.00564,((2517129521:0,2556226606:0):0.04302,646777089:0.01412)0.499:0.01173)0.999:0.07494,(638202197:0.0019,(644970377:0.01507,646623830:0.0075)0.738:0.00362)0.872:0.00931)0.635:0.01598,650856936:0.01308)0.995:0.04,(2506713165:0.01171,((640788680:0.07255,650917784:0.03571)0.466:0.02189,(2519842728:0.03945,(646859549:0.04217,(2511672461:0.01668,(640786544:0.03894,(640793336:0.00335,(640165512:0.02038,641283602:0.0019)0.155:0.00211)0.174:0.00496)0.668:0.01095)0.987:0.02908)1.000:0.05973)0.285:0.02499)0.985:0.02844)0.967:0.02183)0.668:0.01914,(((646367708:0.01473,(638201361:0.00188,646622935:0.00578)0.947:0.01381)0.981:0.02381,(2519841469:0.06809,(650856136:0.0182,2506713669:0.02508)0.727:0.0089)0.915:0.01182)0.971:0.03729,((649738338:0.07412,(648151945:0.00339,638155665:0.00581)0.999:0.05834)0.847:0.02714,((650752390:0.11368,(2516847065:0.01707,2520801411:0.03449)0.997:0.04731)0.943:0.033,(640592705:0.14071,637846211:0.11886)0.974:0.04714)0.907:0.02899)0.938:0.03341)0.999:0.06584,(638168675:0.16751,((649738388:0.09339,((((2540854716:0.00327,2553937573:0.00411)1.000:0.1001,(640592823:0.06966,646533023:0.10017)0.945:0.03786)0.999:0.07732,(650872422:0.0565,(650750471:0.05131,(2516847513:0.01447,2520803234:0.02528)0.998:0.05086)0.932:0.03598)0.014:0.01864)0.823:0.03947,(648151981:0.00285,638155700:0.0045)0.891:0.01979)0.998:0.08667)0.999:0.11839,((2540666849:0.25749,(2518907621:0.04615,2555938320:0.04332)0.979:0.0624)1.000:0.20869,(((2515321874:0.27886,((637699780:0.01253,(2540563143:0.01408,(638165755:0.01119,638179449:0.01933)0.559:0.0062)0.973:0.01906)1.000:0.10181,(((2509663319:0.05476,(2515107634:0.07727,2519472088:0.03736)0.549:0.02018)0.970:0.02691,(637960147:0.03454,646706666:0.12022)0.328:0.01393)0.935:0.02456,(2502870849:0.07124,648055573:0.13944)0.419:0.01578)0.997:0.07265)0.976:0.05902)0.992:0.09155,(650797088:0.06773,(639699575:0.03844,2512008957:0.12921)0.700:0.03684)1.000:0.19382)0.774:0.0559,((2518787893:0.1617,(2512783668:0.01562,(2505971857:0.02991,2505968448:0.06931)0.820:0.01544)1.000:0.10132)0.000:0.03917,((640867801:0.07687,(2507462304:0.07769,(((637897753:0.11989,((2509037835:0.14075,(648194984:0.08661,(648195418:0.04254,2506476786:0.04232)0.986:0.0348)0.722:0.0191)0.553:0.00998,((((640115295:0.06424,2540643958:0.01643)1.000:0.05042,2540643737:0.02653)0.542:0.01245,640115052:0.0251)0.986:0.04265,(2507146024:0.06963,2507147269:0.0417)0.641:0.01435)0.611:0.01449)0.989:0.03824)0.424:0.02187,((640099739:0.01547,2525334810:0.02122)0.833:0.00545,(640100248:0.00445,2525335778:0.02442)0.761:0.0023)1.000:0.12528)0.944:0.03961,643570914:0.0938)0.959:0.03885)0.984:0.04627)0.758:0.02972,2509039570:0.05614)1.000:0.19872)0.185:0.04422)0.999:0.13795)0.646:0.06267)0.977:0.07517)0.979:0.05715);' old_tree = Tree.get(schema='newick', data=old_tree_newick) tree_to_reroot = Tree.get(schema='newick', data=new_tree_newick) r = Rerooter() reann = Reannotator() new_tree = r.reroot_by_tree( r.reroot(old_tree), r.reroot(tree_to_reroot)) expected_lefts = old_tree.seed_node.child_nodes()[0].leaf_nodes() expected_rights = old_tree.seed_node.child_nodes()[1].leaf_nodes() for tip in expected_lefts: self.assertTrue(tip.taxon.label in [t.taxon.label for t in new_tree.seed_node.child_nodes()[1].leaf_nodes()]) for tip in expected_rights: self.assertTrue(tip.taxon.label in [t.taxon.label for t in new_tree.seed_node.child_nodes()[0].leaf_nodes()]) self.assertEqual(len(tree_to_reroot.leaf_nodes()), len(new_tree.leaf_nodes()))
def run_tact(script_runner, datadir, stem): backbone = os.path.join(datadir, stem + ".backbone.tre") taxonomy = os.path.join(datadir, stem + ".taxonomy.tre") taxed = Tree.get(path=taxonomy, schema="newick") bbone = Tree.get(path=backbone, schema="newick") result = script_runner.run("tact_add_taxa", "--taxonomy", taxonomy, "--backbone", backbone, "--output", ".tact-pytest-" + stem, "-vv") assert result.returncode == 0 output = ".tact-pytest-" + stem + ".newick.tre" tacted = Tree.get(path=output, schema="newick") ss = tacted.as_ascii_plot() sys.stderr.write(ss) result = script_runner.run("tact_check_results", output, "--taxonomy", taxonomy, "--backbone", backbone, "--output", ".tact-pytest-" + stem + ".check.csv", "--cores=1") assert result.returncode == 0 return (tacted, taxed, bbone)
def pretty_print_trees(): print "\n. OK, I'm reformatting the RAxML results for nice printing..." """Reformats the phylogeny, such that each taxon label looks like this: trna12-AlaTCT[6/7] . . . where 6 is the number of sequences collapsed into this sequence, and 7 is the number of total tRNAs in the databse.""" species_list = species_trna_seq.keys() species_list.sort() for species in species_list: #print species_trna_dups[species] treepath = RAXMLDIR + "/RAxML_result." + species if False == os.path.exists( treepath ): continue newtreepath = TREEDIR + "/" + species + ".tree" t = Tree() t.read_from_path(treepath, "newick") print " -->", treepath trna_count = count_trna_types(species) #print trna_count newts = t.__str__() for taxon in t.taxon_set: #print "372:", taxon.label #thisac = get_ac_from_name(taxon.label) thisac = species_trna_mtrip[species][taxon.label] count_this_type = trna_count[thisac] count_dups = 0 if taxon.label in species_trna_dups[species]: count_dups = species_trna_dups[species][taxon.label].__len__() + 1 if count_dups <= 1: count_dups = "" else: count_dups = "(" + count_dups.__str__() + ")" mark = "" if species in species_switchedtrnas: print "534:", species_switchedtrnas[species] if species_switchedtrnas[species].__contains__(taxon.label): mark = "***" newts = re.sub( taxon.label, (taxon.label + count_dups + "[" + count_this_type.__str__()+ "]" + mark), newts) fout = open(newtreepath, "w") fout.write( newts + "\n" ) fout.close()
def write_and_read_nexus(filename, header, tree_id, tree_str): tns = TaxonNamespace(is_case_sensitive=True) # write a temp file containing tree with open(filename, "w") as f: for line in header + ["tree " + tree_id + " " + tree_str]: f.write(line + "\n"); # read tree as dendropy tree tree = Tree.get(path=filename, schema="nexus", taxon_namespace=tns, case_sensitive_taxon_labels=True, suppress_internal_node_taxa=False) return tree
def main(): from sys import argv treefile = argv[1] t = Tree.get_from_path(treefile, "newick") R = resolve_tree(t) for s in R: print(s)
def main(OT_filehandle, OTTs_to_keep, outfile): #read in tree, but don't create taxa (faster)\ tree = Tree.get(stream=OT_filehandle, schema="newick", suppress_leaf_node_taxa=True) for node in tree.postorder_node_iter(): if hasattr(node, 'keep') or node_label_in(node, OTTs_to_keep): if node.parent_node: #this is not the root node.parent_node.keep=True else: if not hasattr(node, 'keep'): node.parent_node.remove_child(node, suppress_unifurcations=False) tree.write(file=outfile, schema='newick', suppress_leaf_node_labels=False)
def compute_tree_distances(con): cur = con.cursor() cur.execute("insert or replace into TreeDistanceMetrics(metricid, name) values(1, 'symmetric')") cur.execute("insert or replace into TreeDistanceMetrics(metricid, name) values(2, 'euclidean')") con.commit() treeid_dendropytree = {} sql = "select id, almethod, phylomodelid, newick from UnsupportedMlPhylogenies" con.execute(sql) x = cur.fetchall() for ii in x: treeid = ii[0] t = Tree() t.read_from_string(newick, "newick") treeid_dendropytree[treeid] = t for ii in treeid_dendropytree: treeii = treeid_dendropytree[ii] this_row = [] for jj in treeid_dendropytree: treejj = treeid_dendropytree[jj] """Symmetric Distance""" distance = treeii.symmetric_difference(treejj) """Store the computed distance in the database.""" sql = "insert into TreeDistances(metricid, treeida, treeidb, distance) values(" sql += "1," + ii.__str__() + "," + jj.__str__() + "," + distance.__str__() + ")" cur.execute(sql) """Euclidean Distance""" distance = treeii.euclidean_distance(treejj) """Store the computed distance in the database.""" sql = "insert into TreeDistances(metricid, treeida, treeidb, distance) values(" sql += "2," + ii.__str__() + "," + jj.__str__() + "," + distance.__str__() + ")" cur.execute(sql) con.commit()
def get_bipart(ts, species): t = Tree.get(data=ts, schema='newick') hash_node = t.find_node(lambda n: n.label == '#1') sub_nodes = set(n.taxon.label for n in hash_node.leaf_iter()) all_nodes = set(n.taxon.label for n in t.leaf_node_iter()) b1 = ''.join('1' if l in sub_nodes else ('0' if l in all_nodes else '?') for l in species) b2 = ''.join(neg(v) for v in b1) assert b1 != b2 assert len(b1) == len(b2) and len(b1) == len(species) return min(b1, b2)
def assert_tree_equal_no_labels(self, expected_newick, observed_tree): '''should include some tree ordering because ordering of children is not relevant, but eh for now''' expected = Tree.get(data=expected_newick, schema='newick', rooting='force-rooted') def prep_tree(tree): for n in tree.internal_nodes(): n.label = None if n.edge.length is None: n.edge.length=0.0 tree = self.sort_tree(tree) prep_tree(expected) prep_tree(observed_tree) self.assertEqual(str(self.sort_tree(expected)), str(self.sort_tree(observed_tree)))
def root_tree(f_name, out): t = Tree.get(path=f_name, schema="newick", rooting='force-rooted') t.reroot_at_midpoint() f = open(out, "w+") t.write(path=out, schema="newick", suppress_rooting=True, real_value_format_specifier="12.8f") f.close()
def test_write_fasttree_newick(self): tc = DendropyTreeCleaner() tree = Tree.get(data="((a,b),(d,e))root;", schema='newick') self.assertEqual("((a,b),(d,e));\n", self.clean(tc, tree)) # Internal labels should be removed. tree = Tree.get(data="((a_2,b)c,(d,e)f)root;", schema='newick') self.assertEqual("((a_2,b),(d,e));\n", self.clean(tc, tree)) # Quoted spaces should become underscores. tree = Tree.get(data="(('a 2',b),(d,e))root;", schema='newick') self.assertEqual("((a_2,b),(d,e));\n", self.clean(tc, tree)) # Test underscores that are quoted. tree = Tree.get(data="(('a_2',b),(d,e))root;", schema='newick') self.assertEqual("((a_2,b),(d,e));\n", self.clean(tc, tree)) # Test dashes tree = Tree.get(data="((ANME-2dV10_01644,b),(d,e))root;", schema='newick') self.assertEqual("((ANME-2dV10_01644,b),(d,e));\n", self.clean(tc, tree)) # A more real world example with '~' characters (which never mattered actually). tree = Tree.get( data= "('Asulf_Archaeoglobus.1_2280~2522125074':7.17,(('Afulgi_764~2528311132':0.0,'CP006577_764~2588253768':0.0):0.0,'AE000782_746~638154502':0.0)'s__Archaeoglobus fulgidus':7.555):1.461;\n", schema='newick') self.assertEqual( "(Asulf_Archaeoglobus.1_2280~2522125074:7.17,((Afulgi_764~2528311132:0.0,CP006577_764~2588253768:0.0):0.0,AE000782_746~638154502:0.0):7.555):1.461;\n", self.clean(tc, tree))
def test_reroot_trifurcated_tree_at_longest_child(self): test_tree_1 = Tree.get(schema='newick', data=u'(A:0.1,B:0.2,(C:0.3,D:0.4):0.5);') test_tree_2 = Tree.get(schema='newick', data=u'(A:0.5,B:0.2,(C:0.3,D:0.4):0.1);') test_tree_3 = Tree.get(schema='newick', data=u'(A:0.2,B:0.5,(C:0.3,D:0.4):0.1);') expected_test_tree_1 = str( Tree.get(schema='newick', data=u"((C:0.3,D:0.4):0.25,(A:0.1,B:0.2):0.25);")) expected_test_tree_2 = str( Tree.get(schema='newick', data=u"(A:0.25,(B:0.2,(C:0.3,D:0.4):0.1):0.25);")) expected_test_tree_3 = str( Tree.get(schema='newick', data=u"(B:0.25,(A:0.2,(C:0.3,D:0.4):0.1):0.25);")) rerooted_test_tree_1 = str(Rerooter().reroot(test_tree_1)).strip() rerooted_test_tree_2 = str(Rerooter().reroot(test_tree_2)).strip() rerooted_test_tree_3 = str(Rerooter().reroot(test_tree_3)).strip() self.assertEqual(rerooted_test_tree_1, expected_test_tree_1) self.assertEqual(rerooted_test_tree_2, expected_test_tree_2) self.assertEqual(rerooted_test_tree_3, expected_test_tree_3)
def runProgram(referenceTreeFile, sampleTreeList, bootstrap_cutoff_value=80, output_tree="output_tree.tre", verbose=False, quiet=False, timing=False): if verbose: print("Reference Tree: ", referenceTreeFile) print("Sample Tree List: ", sampleTreeList) print("Bootstrap Cutoff Value: ", bootstrap_cutoff_value) print("Output Tree File: ", output_tree) if timing: verbose = False try: reference_tree = Tree.get(path=referenceTreeFile, schema="newick", preserve_underscores=True) except: print( "Error with file '{}': please only use files with newick tree format" .format(referenceTreeFile)) sys.exit() reference_tree_namespace = reference_tree.taxon_namespace sample_tree_list = readTrees(sampleTreeList, reference_tree_namespace, quiet) # Check if gene tree taxon namespace matches reference tree for s in sample_tree_list: if not reference_tree_namespace.has_taxa_labels( s.taxon_namespace.labels()): print( 'Error: reference tree is of a different taxon namespace as the sample trees' ) return full_quartet_dictionary = buildFullSupport(sample_tree_list, bootstrap_cutoff_value, verbose, quiet, timing) if verbose: print("Full quartet dictionary with support values") [ print(quartet, full_quartet_dictionary[quartet]) for quartet in full_quartet_dictionary ] print() buildLabeledTree(referenceTreeFile, full_quartet_dictionary, output_tree, quiet, timing)
def recom_resultFig_dm(recom_prob, mixtureProb): output = np.zeros((alignment_len, nodes_number)) for i in range(len(recom_prob)): if (recom_prob['recom_nodes'][i] < tips_num): for j in range(alignment_len): if (recom_prob['posterior'][i][j][1] >= mixtureProb): output[j, recom_prob['recom_nodes'][i]] = 1 else: # for j in range(alignment_len): # if (recom_prob['posterior'][i][j][1] >= mixtureProb): # output[j, recom_prob['target_node'][i]] = 1 for j in range(i + 1, len(recom_prob)): if (recom_prob['recom_nodes'][i] == recom_prob['target_node'][j]) and ( recom_prob['recom_nodes'][j] == recom_prob['target_node'][i]): for k in range(alignment_len): if ((recom_prob['posterior'][i][k][1] >= mixtureProb) and (recom_prob['posterior'][j][k][1] >= mixtureProb)): output[k, recom_prob['target_node'][i]] = 1 # if (recom_prob['posterior'][i][k] < recom_prob['posterior'][j][k]): # recom_prob['posterior'][i][k] = recom_prob['posterior'][j][k] # if (recom_prob['posterior'][i][k] >= mixtureProb): # output[k, recom_prob['target_node'][i]] = 1 fig = plt.figure(figsize=(tips_num + 9, tips_num / 2)) color = ['red', 'green', 'purple', 'blue', 'black'] clonaltree = Tree.get_from_path(tree_path, 'newick') set_index(clonaltree, alignment) for i in range(nodes_number): ax = fig.add_subplot(nodes_number, 1, i + 1) if i >= tips_num: desc = set() d = give_descendents(clonaltree, i, desc) ax.plot(output[:, i], label=str(i) + ' is mrca:' + str(d), color=color[i % 5]) else: ax.plot(output[:, i], label=give_taxon(clonaltree, i), color=color[i % 5]) ax.legend(bbox_to_anchor=(0.045, 1.5), prop={'size': 10}) ax.set_frame_on(False) ax.axis('off') ax.axis('on') ax.set_yticklabels([]) plt.savefig("PhyloHMM_Recombination_two.jpeg") # plt.show() return output
def test_joel_bug(self): tree67 = u'''[ Thu Sep 10 15:55:28 2015: Loaded from /srv/projects/graftm/testing_files/testing_graftM/tmp_01_decorate/67_otus.tree Thu Sep 10 15:56:18 2015: tree_67_otus saved to /srv/projects/graftm/testing_files/testing_graftM/tmp_01_decorate/67_otus.rerooted.tree ] ((((1928988:0.10866,2909029:0.15809):0.03546,((801940:0.10703,(3825327:0.12686,4298210:0.09398):0.07480):0.02560,729293:0.21465):0.01982):0.02058,((426860:0.16275,219508:0.12556):0.02403,((1128285:0.06200,4455990:0.07954):0.07525,(815912:0.12348,(3770699:0.23707,823009:0.09955):0.04225):0.01489):0.01849):0.01531):0.09184,(((2361381:0.22741,(3779572:0.06720,4363260:0.07438):0.01460):0.04187,(((((((734152:0.13251,4091454:0.12251):0.03552,((576962:0.14097,(1145804:0.14124,3106714:0.14895):0.01964):0.01668,(2014493:0.15560,(3192744:0.11018,(202294:0.07263,1138804:0.08032):0.05015):0.01277):0.01187):0.01016):0.01486,4323734:0.15004):0.00053,(759363:0.05430,4459468:0.04835):0.03216):0.01531,4322265:0.12041):0.01024,(4391683:0.11058,(229854:0.07735,(4336814:0.09937,((150571:0.07911,2730777:0.10930):0.04404,((4042859:0.25381,(717487:0.13914,4363563:0.19585):0.02281):0.02587,(((3190878:0.16480,4452949:0.07312):0.05029,(4015030:0.10339,(4438491:0.04779,(2286116:0.08699,(4251079:0.03657,4349225:0.02256):0.01189):0.01091):0.04963):0.01748):0.02917,(3014179:0.16455,(2170497:0.16101,(2107103:0.22406,951205:0.11633):0.02436):0.02574):0.03041):0.01561):0.02862):0.02589):0.01914):0.01811):0.01347):0.01451,((182569:0.14758,4363259:0.07793):0.04894,696036:0.14901):0.01514):0.01624):0.02659,(3761685:0.11278,4423155:0.16503):0.03965):0.09184); ''' tree70 = u'((4423550:0.17275,((4091454:0.108,4427993:0.1045)50:0.01575,((123662:0.06599,(3269889:0.12737,(104534:0.06041,734152:0.09136)20:0.00526)80:0.01669)90:0.01398,(300695:0.10755,225636:0.1317)100:0.0405)0:0.01073)40:0.0128)20:0.00782,(4377103:0.09243,((172946:0.08097,1145804:0.08645)100:0.02986,(1941303:0.0953,4332975:0.09505)90:0.00838)100:0.02206)90:0.0272,((((1931714:0.07012,(4322265:0.10071,4343117:0.13235)100:0.01842)100:0.03116,(((759363:0.05402,4459468:0.0433)100:0.02405,(294612:0.14484,2679839:0.1009)90:0.02132)70:0.01331,((((((730039:0.15444,((4015030:0.11176,(4438491:0.04568,(4349225:0.02406,(2286116:0.08501,(4251079:0.02026,4386156:0.01582)80:0.01016)40:0.0097)80:0.0168)100:0.03826)50:0.01397,(4308961:0.10766,4452949:0.05355)90:0.06215)40:0.01455)50:0.01325,(((1718272:0.12738,(150571:0.08502,(699249:0.03117,2730777:0.03253)100:0.06302)70:0.02174)60:0.03847,(((2107103:0.20025,3190878:0.14435)40:0.03601,(1824285:0.10892,3014179:0.14706)30:0.02039)0:0.01309,((3366304:0.09202,951205:0.07509)100:0.05732,2170497:0.16332)90:0.02722)10:0.01937)0:0.01868,(3064426:0.20791,((1837676:0.14477,(4363563:0.14803,4479774:0.10823)90:0.04638)90:0.03766,(4042859:0.2295,717487:0.15674)40:0.01749)20:0.01416)0:0.01063)0:0.03387)100:0.04795,4336814:0.08037)0:0.02958,(346735:0.11193,4391683:0.07639)60:0.00894)0:0.01312,1142178:0.07594)0:0.01881,(229854:0.0646,4460175:0.09289)90:0.02422)20:0.01731)0:0.01339)0:0.00777,(((2984017:0.05634,4340384:0.07722)80:0.03016,(((4371218:0.13005,(1133483:0.08797,3106714:0.09717)90:0.02053)80:0.02174,(3256066:0.08328,4022282:0.11841)90:0.03619)100:0.03392,((202294:0.06795,1138804:0.07777)100:0.05296,(3192744:0.09608,(2014493:0.11684,(180127:0.06532,4417185:0.0713)100:0.03824)100:0.0368)40:0.01663)70:0.00787)50:0.01733)10:0.0083,(222095:0.1391,(288404:0.13004,(4323734:0.07601,4446882:0.06844)60:0.01661)100:0.02863)40:0.01639)0:0.00846)0:0.0135,(((((1133369:0.07769,4336154:0.07979)100:0.11778,(((708774:0.0822,((114724:0.047,82092:0.04936)100:0.11526,(201206:0.10329,4423155:0.14181)60:0.03138)40:0.01886)80:0.03209,(202302:0.11673,3761685:0.09059)100:0.02325)90:0.02946,(((576962:0.11188,202459:0.09918)90:0.033,(213358:0.0989,(3390949:0.09853,3726184:0.09836)90:0.03298)90:0.02315)20:0.01425,202949:0.15903)0:0.01188)20:0.02709)10:0.01609,((4323100:0.0982,4409929:0.10612)60:0.01386,((696036:0.11283,(203529:0.18615,202449:0.08377)10:0.02209)30:0.02916,((2361381:0.18808,203220:0.10905)100:0.04166,(4363260:0.07208,(3779572:0.04977,114015:0.13268)70:0.02151)70:0.01055)100:0.04229)0:0.01717)0:0.01634)0:0.00519,(539547:0.12233,(4409453:0.14784,(4363259:0.05689,((268769:0.0594,266521:0.05311)100:0.04977,(182569:0.10314,4463866:0.07165)70:0.01505)100:0.04024)80:0.01602)100:0.05088)20:0.02162)0:0.0112,((573196:0.11279,((((3825327:0.11767,4298210:0.09472)100:0.07495,(836195:0.11165,801940:0.09002)100:0.02232)90:0.0347,((1928988:0.1129,(1129716:0.13293,2909029:0.13959)50:0.01858)70:0.02572,(((815912:0.12176,((219508:0.13512,(426860:0.12643,(202758:0.04748,4344033:0.03692)100:0.11429)90:0.0487)20:0.00791,((823117:0.10669,823009:0.0888)90:0.0381,3770699:0.24911)50:0.02136)40:0.02309)30:0.01326,(4455990:0.05381,(1128285:0.06585,4271527:0.03794)70:0.02727)100:0.06911)10:0.01546,4097115:0.09311)30:0.02142)20:0.01039)20:0.02855,(729293:0.18117,3871866:0.11553)90:0.03599)100:0.15854)20:0.02836,150700:0.13922)20:0.02787)0:0.00717)0:0.00859)100;' old_tree = Tree.get(schema='newick', data=tree67) tree_to_reroot = Tree.get(schema='newick', data=tree70) new_tree = Rerooter().reroot_by_tree( old_tree, tree_to_reroot) expected_lefts = old_tree.seed_node.child_nodes()[0].leaf_nodes() expected_rights = old_tree.seed_node.child_nodes()[1].leaf_nodes() for tip in expected_lefts: self.assertTrue(tip.taxon.label in [t.taxon.label for t in new_tree.seed_node.child_nodes()[1].leaf_nodes()]) for tip in expected_rights: self.assertTrue(tip.taxon.label in [t.taxon.label for t in new_tree.seed_node.child_nodes()[0].leaf_nodes()]) self.assertEqual(len(tree_to_reroot.leaf_nodes()), len(new_tree.leaf_nodes()))
def summary_to_nw_str(mcmc_tree_filename): calc_summary_tree(mcmc_tree_filename) # convert summary nexus tree to newick for ete3 tns = TaxonNamespace(is_case_sensitive=True) filename = mcmc_tree_filename + "_summary.tree" dp_tree = Tree.get(path=filename, schema="nexus", taxon_namespace=tns, case_sensitive_taxon_labels=True, suppress_internal_node_taxa=False) # drop all annotations and illegal characters return dp_tree.as_string('newick', suppress_annotations=True)[5:].rstrip("\n")
def bipartition_by_root(self): if (self.n_leaves == 1): return (None, None, None) root = self._tree.seed_node t1_root = root._child_nodes[0] t = self._tree t.prune_subtree(t1_root, update_splits=True, delete_outdegree_one=True) t1 = PhylogeneticTree(t) t2 = PhylogeneticTree(Tree(t1_root)) # Reroot if there's more than node left if (t2.n_leaves > 1): t2._tree.reroot_at_node(t1_root) return t1, t2, root
def getBalancedTreeByHeight(self, size, th): clades = int(math.log(size,2)) unit = (th*1.0) / clades self.taxCtr = 0; self.bal_newick = '' self.buildBalancedString(1,clades,unit) self.bal_newick += ';' #print(self.bal_newick) tree = Tree.get_from_string(self.bal_newick,"newick") #print(tree) tree.deroot() return tree
def sample_with_outgroups(a_tree, n_ingroups, n_outgroups=1, n_reps=1): # sample n_reps trees from a large tree, each has n_ingroups and n_outgroups taxa samples = [] for i in range(n_reps): t = Tree(a_tree) check, igs, ogs = sample_and_prune(t, n_ingroups, n_outgroups=n_outgroups) if not check: return False, samples samples.append((t, igs, ogs)) return True, samples
def get_dendropy_tree_from_break_tree(self, break_tree): nodes = self._extract_break_tree_leaves(break_tree) sorted_break_tree_leaves_sets, max_leaves_set_size = self._sort_break_tree_leaves_sets(break_tree) for leave_set_size in range(max_leaves_set_size): if leave_set_size not in sorted_break_tree_leaves_sets: continue for leave_set in sorted_break_tree_leaves_sets[leave_set_size]: one_step_parents = [] for leaf in leave_set: node = self._find_node_with_same_taxon(nodes, leaf) oldest_parent = self._get_oldest_parent(node) if oldest_parent not in one_step_parents: one_step_parents.append(oldest_parent) new_oldest_parent = Node() if len(one_step_parents) > 1: for parent in one_step_parents: parent.parent_node = new_oldest_parent oldest_parents = [] for node in nodes: oldest_parent = self._get_oldest_parent(node) if oldest_parent not in oldest_parents: oldest_parents.append(oldest_parent) if len(oldest_parents) > 1: seed = Node() for oldest_parent in oldest_parents: oldest_parent.parent_node = seed elif len(oldest_parents) == 1: seed = oldest_parents[0] else: seed = Node() tree = Tree(seed_node=seed) tree.deroot() return tree
def generate_ATT_from_phylesystem(aln, workdir, study_id, tree_id, phylesystem_loc='api'): """gathers together tree, alignment, and study info - forces names to otu_ids. Outputs AlignTreeTax object. an alignemnt, a Input can be either a study ID and tree ID from OpenTree Alignemnt need to be a Dendropy DNA character matrix!""" #TODO CHECK ARGS assert(isinstance(aln, datamodel.charmatrixmodel.DnaCharacterMatrix)) for tax in aln.taxon_namespace: tax.label = tax.label.replace(" ", "_") #Forcing all spaces to underscore UGH nexson = get_nexson(study_id, phylesystem_loc) ott_ids = get_subtree_otus(nexson, tree_id=tree_id, subtree_id="ingroup", return_format="ottid") ott_mrca = get_mrca_ott(ott_ids) newick = extract_tree(nexson, tree_id, PhyloSchema('newick', output_nexml2json='1.2.1', content="tree", tip_label="ot:originalLabel")) newick = newick.replace(" ", "_") #UGH Very heavy handed, need to make sure happens on alignement side as well. tre = Tree.get(data=newick, schema="newick", preserve_underscores=True, taxon_namespace=aln.taxon_namespace) otus = get_subtree_otus(nexson, tree_id=tree_id) otu_dict = {} orig_lab_to_otu = {} treed_taxa = {} for otu_id in otus: otu_dict[otu_id] = extract_otu_nexson(nexson, otu_id)[otu_id] otu_dict[otu_id]['^physcraper:status'] = "original" otu_dict[otu_id]['^physcraper:last_blasted'] = "1900/01/01" orig = otu_dict[otu_id].get(u'^ot:originalLabel').replace(" ", "_") orig_lab_to_otu[orig] = otu_id treed_taxa[orig] = otu_dict[otu_id].get(u'^ot:ottId') for tax in aln.taxon_namespace: try: tax.label = orig_lab_to_otu[tax.label].encode('ascii') except KeyError: sys.stderr.write("{} doesn't have an otu id. It is being removed from the alignement. This may indicate a mismatch between tree and alignement\n".format(tax.label)) #need to prune tree to seqs and seqs to tree... otu_newick = tre.as_string(schema="newick") return AlignTreeTax(otu_newick, otu_dict, aln, ingroup_mrca=ott_mrca, workdir=workdir) #newick should be bare, but alignement should be DNACharacterMatrix
def returnRootOfTree( infile, filePrefix, ext): ''' input: path to the file containing newick tree return root of the Tree ''' directory=os.path.dirname(os.path.realpath(infile)) treePath= directory+'/'+filePrefix+'.'+ ext rootNode='' myTree= Tree.get_from_path(treePath, 'newick', annotations_as_nhx=True, extract_comment_metadata=True , suppress_annotations=False) for i in myTree.internal_nodes(): if i.level() == 0: rootNode=i.get_node_str() break return rootNode
def test_input_unrooted_tree(self): otu61 = os.path.join(path_to_data, '61_otus.gpkg','61_otus.refpkg') with tempfile.NamedTemporaryFile(suffix='.fa') as bad_alignment: with tempdir.TempDir() as tmp: Create(prerequisites).main( taxtastic_taxonomy=os.path.join(otu61,'61_otus_taxonomy.csv'), taxtastic_seqinfo=os.path.join(otu61,'61_otus_seqinfo.csv'), # created with newick_utils: # nw_prune test/data/61_otus.gpkg/61_otus.refpkg/61_otus.tre 4459468 >test/data/61_otus.without_4459468.tre unrooted_tree=os.path.join(path_to_data,'create','61_otus.without_4459468.tre'), sequences=os.path.join(path_to_data,'create','61_otus.without_4459468.fasta'), alignment=os.path.join(path_to_data,'create','61_otus.without_4459468.aln.fasta'), prefix=tmp, force=True) gpkg = GraftMPackage.acquire(tmp) tree=Tree.get(schema='newick', data=open(gpkg.reference_package_tree_path()).readline()) self.assertEqual(21, len(tree.leaf_nodes()))
def read_matrix_and_tree(char_file_path, tree_file_path, char_type=DnaCharacterMatrix, char_schema='fasta', tree_schema='newick'): if char_file_path: d = char_type.get(path=char_file_path, schema=char_schema) tn = d.taxon_namespace tn.is_mutable = False else: d, tn = None, None tree = Tree.get(path=tree_file_path, schema=tree_schema, preserve_underscores=True, taxon_namespace=tn) return d, tree