def __init__(self, **kwargs): ''' Parameters ---------- reference_tree_path: str Path to the file containing the reference tree, which is used to retroot the tree tree provided to tree tree_path: str Path to the file containing the tree to be re-rooted. This tree will be rerooted at the same position as the tree porovided to the reference_tree ''' reference_tree_path = kwargs.pop('reference_tree_path', None) tree_path = kwargs.pop('tree_path') logging.debug("Importing old tree from file: %s" % tree_path) self.tree = Tree.get(path=tree_path, schema='newick') if reference_tree_path: logging.debug("Importing reference tree from file: %s" % reference_tree_path) self.reference_tree = Tree.get(path=reference_tree_path, schema='newick') else: self.reference_tree = reference_tree_path if len(kwargs) > 0: raise Exception("Unexpected arguments provided to Decorator class: %s" % kwargs)
def scale_tree(tree, x): taxa = tree.taxon_namespace s_tree = Tree.get(data=tree.as_string("newick"), taxon_namespace=taxa, schema="newick", rooting="force-rooted") tree.is_rooted = True tree.encode_bipartitions() s_tree.encode_bipartitions() mapping = {} mu = x[-2] for node in tree.postorder_node_iter(): if node is not tree.seed_node and node.is_active: key = node.bipartition mapping[key] = node.idx for node in s_tree.postorder_node_iter(): if node is not s_tree.seed_node: if node.bipartition in mapping: idx = mapping[node.bipartition] node.edge_length *= x[idx] #t_tree = Tree.get(data=s_tree.as_string("newick"),taxon_namespace=taxa,schema="newick",rooting="force-rooted") t_tree = Tree.get(data=s_tree.as_string("newick"), schema="newick", rooting="force-rooted") for node in t_tree.postorder_node_iter(): if node is not t_tree.seed_node: node.edge_length /= mu return s_tree, t_tree
def addSiblingsFromLabels(concat_tree_file, species_tree_file, split_string): concat_tree = Tree.get(path=concat_tree_file, schema="newick", preserve_underscores=True) species_tree = Tree.get(path=species_tree_file, schema="newick", preserve_underscores=True) concat_tree_leaves = [leaf for leaf in concat_tree.leaf_nodes()] for leaf_node in species_tree.leaf_nodes(): species_label = leaf_node.taxon.label # print([c.taxon for c in leaf_node.sibling_nodes()]) similar_nodes = find_similar(concat_tree_leaves, species_label) if (len(similar_nodes) is 1): string_to_add = split_string + similar_nodes[0].taxon.label.split(split_string)[1] new_label = leaf_node.taxon.label + string_to_add leaf_node.taxon.label = new_label continue # print('old: ', leaf_node.taxon.label, ' new: ', new_label) elif (len(similar_nodes) > 1): # print(leaf_node._parent_node._child_nodes) parent = leaf_node._parent_node for new_node in similar_nodes: parent.add_child(new_node) old_node = parent.remove_child(leaf_node) # print('old node: ', old_node, ' new nodes: ', parent.child_nodes()) # print() continue print(species_tree.as_string(schema="newick", suppress_internal_node_labels=False))
def test_joel_bug(self): tree67 = u'''[ Thu Sep 10 15:55:28 2015: Loaded from /srv/projects/graftm/testing_files/testing_graftM/tmp_01_decorate/67_otus.tree Thu Sep 10 15:56:18 2015: tree_67_otus saved to /srv/projects/graftm/testing_files/testing_graftM/tmp_01_decorate/67_otus.rerooted.tree ] ((((1928988:0.10866,2909029:0.15809):0.03546,((801940:0.10703,(3825327:0.12686,4298210:0.09398):0.07480):0.02560,729293:0.21465):0.01982):0.02058,((426860:0.16275,219508:0.12556):0.02403,((1128285:0.06200,4455990:0.07954):0.07525,(815912:0.12348,(3770699:0.23707,823009:0.09955):0.04225):0.01489):0.01849):0.01531):0.09184,(((2361381:0.22741,(3779572:0.06720,4363260:0.07438):0.01460):0.04187,(((((((734152:0.13251,4091454:0.12251):0.03552,((576962:0.14097,(1145804:0.14124,3106714:0.14895):0.01964):0.01668,(2014493:0.15560,(3192744:0.11018,(202294:0.07263,1138804:0.08032):0.05015):0.01277):0.01187):0.01016):0.01486,4323734:0.15004):0.00053,(759363:0.05430,4459468:0.04835):0.03216):0.01531,4322265:0.12041):0.01024,(4391683:0.11058,(229854:0.07735,(4336814:0.09937,((150571:0.07911,2730777:0.10930):0.04404,((4042859:0.25381,(717487:0.13914,4363563:0.19585):0.02281):0.02587,(((3190878:0.16480,4452949:0.07312):0.05029,(4015030:0.10339,(4438491:0.04779,(2286116:0.08699,(4251079:0.03657,4349225:0.02256):0.01189):0.01091):0.04963):0.01748):0.02917,(3014179:0.16455,(2170497:0.16101,(2107103:0.22406,951205:0.11633):0.02436):0.02574):0.03041):0.01561):0.02862):0.02589):0.01914):0.01811):0.01347):0.01451,((182569:0.14758,4363259:0.07793):0.04894,696036:0.14901):0.01514):0.01624):0.02659,(3761685:0.11278,4423155:0.16503):0.03965):0.09184); ''' tree70 = u'((4423550:0.17275,((4091454:0.108,4427993:0.1045)50:0.01575,((123662:0.06599,(3269889:0.12737,(104534:0.06041,734152:0.09136)20:0.00526)80:0.01669)90:0.01398,(300695:0.10755,225636:0.1317)100:0.0405)0:0.01073)40:0.0128)20:0.00782,(4377103:0.09243,((172946:0.08097,1145804:0.08645)100:0.02986,(1941303:0.0953,4332975:0.09505)90:0.00838)100:0.02206)90:0.0272,((((1931714:0.07012,(4322265:0.10071,4343117:0.13235)100:0.01842)100:0.03116,(((759363:0.05402,4459468:0.0433)100:0.02405,(294612:0.14484,2679839:0.1009)90:0.02132)70:0.01331,((((((730039:0.15444,((4015030:0.11176,(4438491:0.04568,(4349225:0.02406,(2286116:0.08501,(4251079:0.02026,4386156:0.01582)80:0.01016)40:0.0097)80:0.0168)100:0.03826)50:0.01397,(4308961:0.10766,4452949:0.05355)90:0.06215)40:0.01455)50:0.01325,(((1718272:0.12738,(150571:0.08502,(699249:0.03117,2730777:0.03253)100:0.06302)70:0.02174)60:0.03847,(((2107103:0.20025,3190878:0.14435)40:0.03601,(1824285:0.10892,3014179:0.14706)30:0.02039)0:0.01309,((3366304:0.09202,951205:0.07509)100:0.05732,2170497:0.16332)90:0.02722)10:0.01937)0:0.01868,(3064426:0.20791,((1837676:0.14477,(4363563:0.14803,4479774:0.10823)90:0.04638)90:0.03766,(4042859:0.2295,717487:0.15674)40:0.01749)20:0.01416)0:0.01063)0:0.03387)100:0.04795,4336814:0.08037)0:0.02958,(346735:0.11193,4391683:0.07639)60:0.00894)0:0.01312,1142178:0.07594)0:0.01881,(229854:0.0646,4460175:0.09289)90:0.02422)20:0.01731)0:0.01339)0:0.00777,(((2984017:0.05634,4340384:0.07722)80:0.03016,(((4371218:0.13005,(1133483:0.08797,3106714:0.09717)90:0.02053)80:0.02174,(3256066:0.08328,4022282:0.11841)90:0.03619)100:0.03392,((202294:0.06795,1138804:0.07777)100:0.05296,(3192744:0.09608,(2014493:0.11684,(180127:0.06532,4417185:0.0713)100:0.03824)100:0.0368)40:0.01663)70:0.00787)50:0.01733)10:0.0083,(222095:0.1391,(288404:0.13004,(4323734:0.07601,4446882:0.06844)60:0.01661)100:0.02863)40:0.01639)0:0.00846)0:0.0135,(((((1133369:0.07769,4336154:0.07979)100:0.11778,(((708774:0.0822,((114724:0.047,82092:0.04936)100:0.11526,(201206:0.10329,4423155:0.14181)60:0.03138)40:0.01886)80:0.03209,(202302:0.11673,3761685:0.09059)100:0.02325)90:0.02946,(((576962:0.11188,202459:0.09918)90:0.033,(213358:0.0989,(3390949:0.09853,3726184:0.09836)90:0.03298)90:0.02315)20:0.01425,202949:0.15903)0:0.01188)20:0.02709)10:0.01609,((4323100:0.0982,4409929:0.10612)60:0.01386,((696036:0.11283,(203529:0.18615,202449:0.08377)10:0.02209)30:0.02916,((2361381:0.18808,203220:0.10905)100:0.04166,(4363260:0.07208,(3779572:0.04977,114015:0.13268)70:0.02151)70:0.01055)100:0.04229)0:0.01717)0:0.01634)0:0.00519,(539547:0.12233,(4409453:0.14784,(4363259:0.05689,((268769:0.0594,266521:0.05311)100:0.04977,(182569:0.10314,4463866:0.07165)70:0.01505)100:0.04024)80:0.01602)100:0.05088)20:0.02162)0:0.0112,((573196:0.11279,((((3825327:0.11767,4298210:0.09472)100:0.07495,(836195:0.11165,801940:0.09002)100:0.02232)90:0.0347,((1928988:0.1129,(1129716:0.13293,2909029:0.13959)50:0.01858)70:0.02572,(((815912:0.12176,((219508:0.13512,(426860:0.12643,(202758:0.04748,4344033:0.03692)100:0.11429)90:0.0487)20:0.00791,((823117:0.10669,823009:0.0888)90:0.0381,3770699:0.24911)50:0.02136)40:0.02309)30:0.01326,(4455990:0.05381,(1128285:0.06585,4271527:0.03794)70:0.02727)100:0.06911)10:0.01546,4097115:0.09311)30:0.02142)20:0.01039)20:0.02855,(729293:0.18117,3871866:0.11553)90:0.03599)100:0.15854)20:0.02836,150700:0.13922)20:0.02787)0:0.00717)0:0.00859)100;' old_tree = Tree.get(schema='newick', data=tree67) tree_to_reroot = Tree.get(schema='newick', data=tree70) new_tree = Rerooter().reroot_by_tree(old_tree, tree_to_reroot) expected_lefts = old_tree.seed_node.child_nodes()[0].leaf_nodes() expected_rights = old_tree.seed_node.child_nodes()[1].leaf_nodes() for tip in expected_lefts: self.assertTrue(tip.taxon.label in [ t.taxon.label for t in new_tree.seed_node.child_nodes()[1].leaf_nodes() ]) for tip in expected_rights: self.assertTrue(tip.taxon.label in [ t.taxon.label for t in new_tree.seed_node.child_nodes()[0].leaf_nodes() ]) self.assertEqual(len(tree_to_reroot.leaf_nodes()), len(new_tree.leaf_nodes()))
def test_ben_bug(self): new_tree_newick = u'(646366661:0.00571,(646777089:0.01427,(2556226606:0.0,2517129521:0.0):0.04312)0.377:0.01170,((650856936:0.01153,(((((646367708:0.01465,(638201361:0.00187,646622935:0.00573)0.940:0.01352)0.988:0.02634,(2519841469:0.06952,(650856136:0.01840,2506713669:0.02486)0.774:0.00888)0.893:0.01193)0.981:0.03778,((649738338:0.07504,(638155665:0.00613,648151945:0.00304)0.995:0.05973)0.884:0.02836,((650752390:0.11644,(2516847065:0.01707,2520801411:0.03442)0.993:0.04619)0.940:0.03278,(640592705:0.14347,637846211:0.11851)0.971:0.04593)0.940:0.03067)0.943:0.03401)0.998:0.06483,(638168675:0.17080,((649738388:0.09935,((((2540854716:0.00325,2553937573:0.00406)1.000:0.09930,(646533023:0.09868,640592823:0.06908)0.951:0.03770)1.000:0.07636,(650872422:0.05527,(650750471:0.05106,(2516847513:0.01440,2520803234:0.02517)0.998:0.05067)0.947:0.03589)0.074:0.01784)0.786:0.03804,(638155700:0.00445,648151981:0.00284)0.894:0.02131)0.995:0.08448)0.999:0.10690,((KYC55281.1:0.28954,(2540666849:0.26647,(2555938320:0.04589,2518907621:0.04631)0.970:0.05624)1.000:0.12340)0.993:0.09723,(((2515321874:0.26529,((637699780:0.01317,(2540563143:0.01361,(638165755:0.01099,638179449:0.01674)0.558:0.00611)0.964:0.01965)1.000:0.10518,(2502870849:0.06989,(648055573:0.14431,(646706666:0.11338,(637960147:0.03570,(2509663319:0.04930,(2519472088:0.03452,2515107634:0.07709)0.639:0.02134)0.957:0.03004)0.316:0.01755)0.809:0.02055)0.323:0.01213)0.991:0.07286)0.974:0.06071)0.997:0.09000,(650797088:0.06590,(639699575:0.03533,2512008957:0.12951)0.779:0.03900)1.000:0.21062)0.685:0.04985,((((640867801:0.08102,(2507462304:0.07476,(643570914:0.08474,((637897753:0.11959,((2509037835:0.14386,(648194984:0.08665,(648195418:0.04239,2506476786:0.04237)0.993:0.03477)0.668:0.01876)0.510:0.00983,((((640115295:0.06428,2540643958:0.01655)1.000:0.05043,2540643737:0.02645)0.502:0.01269,640115052:0.02482)0.987:0.04272,(2507147269:0.04181,2507146024:0.06962)0.615:0.01449)0.611:0.01456)0.992:0.03807)0.542:0.02193,((2525334810:0.02116,640099739:0.01544)0.785:0.00549,(640100248:0.00446,2525335778:0.02444)0.750:0.00227)1.000:0.12583)0.944:0.03489)0.962:0.04171)0.986:0.04499)0.793:0.02738,2509039570:0.05560)1.000:0.18840,(2505968448:0.05750,(2505971857:0.03133,2512783668:0.02305)0.185:0.01848)0.998:0.08344)0.875:0.04885,2518787893:0.16350)0.868:0.04436)1.000:0.14016)0.957:0.05998)0.998:0.08120)0.984:0.05479)0.999:0.07664,(2506713165:0.01408,((650917784:0.03595,640788680:0.07226)0.510:0.02178,(2519842728:0.03972,(646859549:0.04217,(2511672461:0.01672,(640786544:0.03901,(640793336:0.00334,(640165512:0.02037,641283602:0.00189)0.147:0.00210)0.175:0.00497)0.641:0.01093)0.991:0.02914)1.000:0.05940)0.323:0.02509)0.977:0.02843)0.960:0.02127)0.499:0.01743)0.998:0.03986,(638202197:0.00190,(644970377:0.01516,646623830:0.00752)0.678:0.00364)0.903:0.00939)0.626:0.01605);' old_tree_newick = u'(((((646366661:0.00564,((2517129521:0,2556226606:0):0.04302,646777089:0.01412)0.499:0.01173)0.999:0.07494,(638202197:0.0019,(644970377:0.01507,646623830:0.0075)0.738:0.00362)0.872:0.00931)0.635:0.01598,650856936:0.01308)0.995:0.04,(2506713165:0.01171,((640788680:0.07255,650917784:0.03571)0.466:0.02189,(2519842728:0.03945,(646859549:0.04217,(2511672461:0.01668,(640786544:0.03894,(640793336:0.00335,(640165512:0.02038,641283602:0.0019)0.155:0.00211)0.174:0.00496)0.668:0.01095)0.987:0.02908)1.000:0.05973)0.285:0.02499)0.985:0.02844)0.967:0.02183)0.668:0.01914,(((646367708:0.01473,(638201361:0.00188,646622935:0.00578)0.947:0.01381)0.981:0.02381,(2519841469:0.06809,(650856136:0.0182,2506713669:0.02508)0.727:0.0089)0.915:0.01182)0.971:0.03729,((649738338:0.07412,(648151945:0.00339,638155665:0.00581)0.999:0.05834)0.847:0.02714,((650752390:0.11368,(2516847065:0.01707,2520801411:0.03449)0.997:0.04731)0.943:0.033,(640592705:0.14071,637846211:0.11886)0.974:0.04714)0.907:0.02899)0.938:0.03341)0.999:0.06584,(638168675:0.16751,((649738388:0.09339,((((2540854716:0.00327,2553937573:0.00411)1.000:0.1001,(640592823:0.06966,646533023:0.10017)0.945:0.03786)0.999:0.07732,(650872422:0.0565,(650750471:0.05131,(2516847513:0.01447,2520803234:0.02528)0.998:0.05086)0.932:0.03598)0.014:0.01864)0.823:0.03947,(648151981:0.00285,638155700:0.0045)0.891:0.01979)0.998:0.08667)0.999:0.11839,((2540666849:0.25749,(2518907621:0.04615,2555938320:0.04332)0.979:0.0624)1.000:0.20869,(((2515321874:0.27886,((637699780:0.01253,(2540563143:0.01408,(638165755:0.01119,638179449:0.01933)0.559:0.0062)0.973:0.01906)1.000:0.10181,(((2509663319:0.05476,(2515107634:0.07727,2519472088:0.03736)0.549:0.02018)0.970:0.02691,(637960147:0.03454,646706666:0.12022)0.328:0.01393)0.935:0.02456,(2502870849:0.07124,648055573:0.13944)0.419:0.01578)0.997:0.07265)0.976:0.05902)0.992:0.09155,(650797088:0.06773,(639699575:0.03844,2512008957:0.12921)0.700:0.03684)1.000:0.19382)0.774:0.0559,((2518787893:0.1617,(2512783668:0.01562,(2505971857:0.02991,2505968448:0.06931)0.820:0.01544)1.000:0.10132)0.000:0.03917,((640867801:0.07687,(2507462304:0.07769,(((637897753:0.11989,((2509037835:0.14075,(648194984:0.08661,(648195418:0.04254,2506476786:0.04232)0.986:0.0348)0.722:0.0191)0.553:0.00998,((((640115295:0.06424,2540643958:0.01643)1.000:0.05042,2540643737:0.02653)0.542:0.01245,640115052:0.0251)0.986:0.04265,(2507146024:0.06963,2507147269:0.0417)0.641:0.01435)0.611:0.01449)0.989:0.03824)0.424:0.02187,((640099739:0.01547,2525334810:0.02122)0.833:0.00545,(640100248:0.00445,2525335778:0.02442)0.761:0.0023)1.000:0.12528)0.944:0.03961,643570914:0.0938)0.959:0.03885)0.984:0.04627)0.758:0.02972,2509039570:0.05614)1.000:0.19872)0.185:0.04422)0.999:0.13795)0.646:0.06267)0.977:0.07517)0.979:0.05715);' old_tree = Tree.get(schema='newick', data=old_tree_newick) tree_to_reroot = Tree.get(schema='newick', data=new_tree_newick) r = Rerooter() reann = Reannotator() new_tree = r.reroot_by_tree(r.reroot(old_tree), r.reroot(tree_to_reroot)) expected_lefts = old_tree.seed_node.child_nodes()[0].leaf_nodes() expected_rights = old_tree.seed_node.child_nodes()[1].leaf_nodes() for tip in expected_lefts: self.assertTrue(tip.taxon.label in [ t.taxon.label for t in new_tree.seed_node.child_nodes()[1].leaf_nodes() ]) for tip in expected_rights: self.assertTrue(tip.taxon.label in [ t.taxon.label for t in new_tree.seed_node.child_nodes()[0].leaf_nodes() ]) self.assertEqual(len(tree_to_reroot.leaf_nodes()), len(new_tree.leaf_nodes()))
def run_tree_regression(arg, taxa): taxon_namespace = TaxonNamespace([taxon['id'] for taxon in taxa['taxa']]) tree_format = 'newick' with open(arg.tree) as fp: if next(fp).upper().startswith('#NEXUS'): tree_format = 'nexus' if tree_format == 'nexus': tree = Tree.get( path=arg.tree, schema='nexus', tree_offset=0, preserve_underscores=True, taxon_namespace=taxon_namespace, ) else: tree = Tree.get( path=arg.tree, schema='newick', tree_offset=0, preserve_underscores=True, taxon_namespace=taxon_namespace, ) tree.resolve_polytomies(update_bipartitions=True) setup_indexes(tree, False) taxa2 = [{'date': taxon['attributes']['date']} for taxon in taxa['taxa']] initialize_dates_from_taxa(tree, taxa2) return regression(tree)
def generate_ATT_from_files(seqaln, mattype, workdir, treefile, otu_json, ingroup_mrca=None): """Build an ATT object without phylesystem. If no ingroup mrca ott_id is provided, will use all taxa in tree to calc mrca.""" aln = DnaCharacterMatrix.get(path=seqaln, schema=mattype) for tax in aln.taxon_namespace: tax.label = tax.label.replace(" ", "_") #Forcing all spaces to underscore UGH tre = Tree.get(path=treefile, schema="newick", preserve_underscores=True, taxon_namespace=aln.taxon_namespace) with open(otu_json) as data_file: otu_dict = json.load(data_file) for tax in aln: assert tax.label in otu_dict tre = Tree.get(path=treefile, schema="newick", preserve_underscores=True, taxon_namespace=aln.taxon_namespace) otu_newick = tre.as_string(schema="newick") if ingroup_mrca: ott_mrca = int(ingroup_mrca) else: ott_ids = [otu_dict[otu].get['^ot:ottId'] for otu in otu_dict] ott_mrca = get_mrca_ott(ott_ids) return AlignTreeTax(otu_newick, otu_dict, aln, ingroup_mrca=ott_mrca, workdir=workdir)
def test_hello_world(self): self.assert_tree_equal_no_labels( u'((C,(D,E):2.0),(A,B):4);', Rerooter().reroot_by_tree( Tree.get(schema='newick', data=u'((A,B):1,(C,D):2);', rooting="force-rooted"), Tree.get(schema='newick', data=u'((A,B):1,(C,(D,E):2):3);', rooting="force-rooted")))
def refRFDistance(t1NexFilePath, t2NexFilePath): tns = TaxonNamespace() nexTree1 = Tree.get(unconstrained_taxa_accumulation_mode=True, path=t1NexFilePath, schema="nexus", taxon_namespace=tns) nexTree2 = Tree.get(unconstrained_taxa_accumulation_mode=True, path=t2NexFilePath, schema="nexus", taxon_namespace=tns) return (custom_distance(nexTree1, nexTree2))
def get_tree_from_synth(ott_ids, label_format="name", citation="cites.txt"): assert label_format in ['id', 'name', 'name_and_id'] resp = treemachine.get_synth_tree_pruned(ott_ids=ott_ids, label_format=label_format) cites = '' sys.stdout.write("gathering citations") for study in resp['supporting_studies']: sys.stdout.write('.') study = study.split('@')[0] query = {"ot:studyId": study} new_cite = oti.find_studies(query_dict=query, verbose=True) #print new_cite[0].keys() cites = cites + '\n' + to_string( new_cite[0]['ot:studyPublicationReference'] ) + new_cite[0]['ot:studyPublication'] # cites = cites + '\n' +phylesystemref + synthref with open(citation, 'w') as citfile: citfile.write(cites) sys.stdout.write("citations printed to {}\n".format(citation)) tre = Tree.get(data=resp['newick'], schema="newick", suppress_internal_node_taxa=True) tre.suppress_unifurcations() return tre
def __init__(self, tree, otu_dict, alignment, ingroup_mrca, workdir, config_obj, schema='newick', taxon_namespace=None): debug("build ATT class") self.aln = alignment assert isinstance(self.aln, datamodel.charmatrixmodel.DnaCharacterMatrix), \ ("your aln '%s' is not a DnaCharacterMatrix" % alignment) self.tre = Tree.get(data=tree, schema=schema, preserve_underscores=True, taxon_namespace=self.aln.taxon_namespace) assert (self.tre.taxon_namespace is self.aln.taxon_namespace), "tre and aln taxon_namespace are not identical" assert isinstance(otu_dict, dict), ("otu_dict '%s' is not of type dict" % otu_dict) self.otu_dict = otu_dict self.config = config_obj self.ps_otu = 1 # iterator for new otu IDs self._reconcile() self._reconcile_names() self.workdir = os.path.abspath(workdir) if not os.path.exists(self.workdir): os.makedirs(self.workdir) assert int(ingroup_mrca), ("your ingroup_mrca '%s' is not an integer." % ingroup_mrca) self.mrca_ott = ingroup_mrca # ott_ingroup mrca can be pulled directly from phylesystem self.orig_seqlen = [] # will get filled in later... self.gb_dict = {} # has all info about new blast seq self._reconciled = False self.unpubl_otu_json = None
def test_bootstraps_in_annotated_tree_alongside_empty_taxa(self): self.assertEquals({u'a': [], u'b': [], u'c': ['tax'], u'd': ['tax']}, TaxonomyExtractor().taxonomy_from_annotated_tree(\ Tree.get(data="(a,(b,(c,d:0.2)'0.2:tax')0.01973:0.9)root;", schema='newick')))
def test_relabel(self): ## JUST PUT A LIST OF IDS TO SIMPLIFY jetz = OT.get_tree(study_id='ot_809', tree_id='tree1', tree_format="newick", label_format="ot:ottId") jetz_tree = Tree.get(string=jetz.response_dict['content'].decode(), schema='newick', suppress_internal_node_taxa=True, suppress_leaf_node_taxa=True) tips = [tip.label for tip in jetz_tree.leaf_node_iter()] ott_ids = set() for tip in tips: try: ott_ids.add(int(tip)) except: pass ret = taxonomy_helpers.labelled_induced_synth(ott_ids=list(ott_ids), label_format='name') tips = [ tip.taxon.label for tip in ret['labelled_tree'].leaf_node_iter() if tip.taxon ] assert len(tips) == 6624 ret = taxonomy_helpers.labelled_induced_synth( ott_ids=list(ott_ids), label_format='name_and_id') nodes = [ node.taxon.label for node in ret['labelled_tree'] if node.taxon ] assert 'MRCA of taxa in Amazona auropalliata_ott1118 Amazona oratrix_ott1119' in nodes, nodes
def main( args ): tree = Tree.get( path=args.input, schema="newick", preserve_underscores=True ) tree = prune_clock_outliers( tree, iqd=args.iqd, clock_rate=args.clock_rate ) tree.write( path=args.output, schema="newick" )
def test_input_unrooted_tree(self): otu61 = os.path.join(path_to_data, '61_otus.gpkg', '61_otus.refpkg') with tempfile.NamedTemporaryFile(suffix='.fa') as bad_alignment: with tempdir.TempDir() as tmp: Create(prerequisites).main( taxtastic_taxonomy=os.path.join(otu61, '61_otus_taxonomy.csv'), taxtastic_seqinfo=os.path.join(otu61, '61_otus_seqinfo.csv'), # created with newick_utils: # nw_prune test/data/61_otus.gpkg/61_otus.refpkg/61_otus.tre 4459468 >test/data/61_otus.without_4459468.tre unrooted_tree=os.path.join(path_to_data, 'create', '61_otus.without_4459468.tre'), sequences=os.path.join(path_to_data, 'create', '61_otus.without_4459468.fasta'), alignment=os.path.join( path_to_data, 'create', '61_otus.without_4459468.aln.fasta'), prefix=tmp, force=True) gpkg = GraftMPackage.acquire(tmp) tree = Tree.get( schema='newick', data=open(gpkg.reference_package_tree_path()).readline()) self.assertEqual(21, len(tree.leaf_nodes()))
def ete_to_dendropy(tree): from dendropy import Tree as DTree char_matrix = ete_to_dendropy_cm(tree) taxon_namespace = char_matrix.taxon_namespace dendro_tree = DTree.get(data=tree.write(format=1), schema='newick', taxon_namespace=taxon_namespace) return dendro_tree, char_matrix
def test_yule(script_runner, execution_number, datadir): backbone = os.path.join(datadir, "stem2.backbone.tre") taxonomy = os.path.join(datadir, "stem2.taxonomy.tre") taxed = Tree.get(path=taxonomy, schema="newick") bbone = Tree.get(path=backbone, schema="newick", rooting="default-rooted") result = script_runner.run("tact_add_taxa", "--taxonomy", taxonomy, "--backbone", backbone, "--output", ".tact-pytest-yule", "-vv", "--yule") assert result.returncode == 0 output = ".tact-pytest-yule.newick.tre" tacted = Tree.get(path=output, schema="newick", rooting="default-rooted") ss = tacted.as_ascii_plot() sys.stderr.write(ss) result = script_runner.run("tact_check_results", output, "--taxonomy", taxonomy, "--backbone", backbone, "--output", ".tact-pytest-yule.check.csv", "--cores=1") assert result.returncode == 0 return (tacted, taxed, bbone)
def remove_branch_lengths(f, out): t = Tree.get(file=open(f, 'r'), schema="newick") new = open(out, 'w+') for e in t.edges(): e.length = None t.write(file=new, schema="newick")
def test_branch_lengths(self): '''https://github.com/geronimp/graftM/issues/192''' taxes = TaxonomyExtractor().taxonomy_from_annotated_tree( Tree.get(path=os.path.join(path_to_data, 'create', 'sulfitereductase.ben.tree'), schema='newick')) self.assertEquals([u'Aanerobic sulfite reductase asrC', u'Anaerobic sulfite reductase asrC Group 3', u'Unknown alpha and beta subunits', u'0.856_PFAM_NIR_SIR,NIR_SIR_ferr'], # number is actually in the clade name taxes['T506DRAFT_scaffold00010.10_60~2561511230'])
def scale_tree(f_name, n): t = Tree.get(file=open(f_name, 'r'), schema="newick", tree_offset=0) for e in t.edges(): if e.length is not None: e.length = float(n*float(e.length)) t.write(file=open(f_name.replace('.mt', '') + '_' + str(n).replace('.', '_') + '.mt', 'w+'), schema="newick")
def test_remove_sequences_with_named_internal_nodes(self): tc = DendropyTreeCleaner() tree = Tree.get(data="('Asulf_Archaeoglobus.1_2280~2522125074':7.17,(('Afulgi_764~2528311132':0.0,'CP006577_764~2588253768':0.0):0.0,'AE000782_746~638154502':0.0)'s__Archaeoglobus fulgidus':7.555):1.461;\n", schema='newick') tc.remove_sequences(tree, ['CP006577_764~2588253768', 'Afulgi_764~2528311132']) self.assertEqual("(Asulf_Archaeoglobus.1_2280~2522125074:7.17,AE000782_746~638154502:7.555):1.461", str(tree))
def assert_tree_equal_no_labels_deprecated(self, expected_newick, observed_tree): expected = Tree.get(schema='newick', data=expected_newick) for node in expected.nodes(): if not node.is_leaf(): node.label = None for node in observed_tree.nodes(): if not node.is_leaf(): node.label = None self.assertEqual(str(expected), str(observed_tree))
def test_ben_bug(self): new_tree_newick = u'(646366661:0.00571,(646777089:0.01427,(2556226606:0.0,2517129521:0.0):0.04312)0.377:0.01170,((650856936:0.01153,(((((646367708:0.01465,(638201361:0.00187,646622935:0.00573)0.940:0.01352)0.988:0.02634,(2519841469:0.06952,(650856136:0.01840,2506713669:0.02486)0.774:0.00888)0.893:0.01193)0.981:0.03778,((649738338:0.07504,(638155665:0.00613,648151945:0.00304)0.995:0.05973)0.884:0.02836,((650752390:0.11644,(2516847065:0.01707,2520801411:0.03442)0.993:0.04619)0.940:0.03278,(640592705:0.14347,637846211:0.11851)0.971:0.04593)0.940:0.03067)0.943:0.03401)0.998:0.06483,(638168675:0.17080,((649738388:0.09935,((((2540854716:0.00325,2553937573:0.00406)1.000:0.09930,(646533023:0.09868,640592823:0.06908)0.951:0.03770)1.000:0.07636,(650872422:0.05527,(650750471:0.05106,(2516847513:0.01440,2520803234:0.02517)0.998:0.05067)0.947:0.03589)0.074:0.01784)0.786:0.03804,(638155700:0.00445,648151981:0.00284)0.894:0.02131)0.995:0.08448)0.999:0.10690,((KYC55281.1:0.28954,(2540666849:0.26647,(2555938320:0.04589,2518907621:0.04631)0.970:0.05624)1.000:0.12340)0.993:0.09723,(((2515321874:0.26529,((637699780:0.01317,(2540563143:0.01361,(638165755:0.01099,638179449:0.01674)0.558:0.00611)0.964:0.01965)1.000:0.10518,(2502870849:0.06989,(648055573:0.14431,(646706666:0.11338,(637960147:0.03570,(2509663319:0.04930,(2519472088:0.03452,2515107634:0.07709)0.639:0.02134)0.957:0.03004)0.316:0.01755)0.809:0.02055)0.323:0.01213)0.991:0.07286)0.974:0.06071)0.997:0.09000,(650797088:0.06590,(639699575:0.03533,2512008957:0.12951)0.779:0.03900)1.000:0.21062)0.685:0.04985,((((640867801:0.08102,(2507462304:0.07476,(643570914:0.08474,((637897753:0.11959,((2509037835:0.14386,(648194984:0.08665,(648195418:0.04239,2506476786:0.04237)0.993:0.03477)0.668:0.01876)0.510:0.00983,((((640115295:0.06428,2540643958:0.01655)1.000:0.05043,2540643737:0.02645)0.502:0.01269,640115052:0.02482)0.987:0.04272,(2507147269:0.04181,2507146024:0.06962)0.615:0.01449)0.611:0.01456)0.992:0.03807)0.542:0.02193,((2525334810:0.02116,640099739:0.01544)0.785:0.00549,(640100248:0.00446,2525335778:0.02444)0.750:0.00227)1.000:0.12583)0.944:0.03489)0.962:0.04171)0.986:0.04499)0.793:0.02738,2509039570:0.05560)1.000:0.18840,(2505968448:0.05750,(2505971857:0.03133,2512783668:0.02305)0.185:0.01848)0.998:0.08344)0.875:0.04885,2518787893:0.16350)0.868:0.04436)1.000:0.14016)0.957:0.05998)0.998:0.08120)0.984:0.05479)0.999:0.07664,(2506713165:0.01408,((650917784:0.03595,640788680:0.07226)0.510:0.02178,(2519842728:0.03972,(646859549:0.04217,(2511672461:0.01672,(640786544:0.03901,(640793336:0.00334,(640165512:0.02037,641283602:0.00189)0.147:0.00210)0.175:0.00497)0.641:0.01093)0.991:0.02914)1.000:0.05940)0.323:0.02509)0.977:0.02843)0.960:0.02127)0.499:0.01743)0.998:0.03986,(638202197:0.00190,(644970377:0.01516,646623830:0.00752)0.678:0.00364)0.903:0.00939)0.626:0.01605);' old_tree_newick = u'(((((646366661:0.00564,((2517129521:0,2556226606:0):0.04302,646777089:0.01412)0.499:0.01173)0.999:0.07494,(638202197:0.0019,(644970377:0.01507,646623830:0.0075)0.738:0.00362)0.872:0.00931)0.635:0.01598,650856936:0.01308)0.995:0.04,(2506713165:0.01171,((640788680:0.07255,650917784:0.03571)0.466:0.02189,(2519842728:0.03945,(646859549:0.04217,(2511672461:0.01668,(640786544:0.03894,(640793336:0.00335,(640165512:0.02038,641283602:0.0019)0.155:0.00211)0.174:0.00496)0.668:0.01095)0.987:0.02908)1.000:0.05973)0.285:0.02499)0.985:0.02844)0.967:0.02183)0.668:0.01914,(((646367708:0.01473,(638201361:0.00188,646622935:0.00578)0.947:0.01381)0.981:0.02381,(2519841469:0.06809,(650856136:0.0182,2506713669:0.02508)0.727:0.0089)0.915:0.01182)0.971:0.03729,((649738338:0.07412,(648151945:0.00339,638155665:0.00581)0.999:0.05834)0.847:0.02714,((650752390:0.11368,(2516847065:0.01707,2520801411:0.03449)0.997:0.04731)0.943:0.033,(640592705:0.14071,637846211:0.11886)0.974:0.04714)0.907:0.02899)0.938:0.03341)0.999:0.06584,(638168675:0.16751,((649738388:0.09339,((((2540854716:0.00327,2553937573:0.00411)1.000:0.1001,(640592823:0.06966,646533023:0.10017)0.945:0.03786)0.999:0.07732,(650872422:0.0565,(650750471:0.05131,(2516847513:0.01447,2520803234:0.02528)0.998:0.05086)0.932:0.03598)0.014:0.01864)0.823:0.03947,(648151981:0.00285,638155700:0.0045)0.891:0.01979)0.998:0.08667)0.999:0.11839,((2540666849:0.25749,(2518907621:0.04615,2555938320:0.04332)0.979:0.0624)1.000:0.20869,(((2515321874:0.27886,((637699780:0.01253,(2540563143:0.01408,(638165755:0.01119,638179449:0.01933)0.559:0.0062)0.973:0.01906)1.000:0.10181,(((2509663319:0.05476,(2515107634:0.07727,2519472088:0.03736)0.549:0.02018)0.970:0.02691,(637960147:0.03454,646706666:0.12022)0.328:0.01393)0.935:0.02456,(2502870849:0.07124,648055573:0.13944)0.419:0.01578)0.997:0.07265)0.976:0.05902)0.992:0.09155,(650797088:0.06773,(639699575:0.03844,2512008957:0.12921)0.700:0.03684)1.000:0.19382)0.774:0.0559,((2518787893:0.1617,(2512783668:0.01562,(2505971857:0.02991,2505968448:0.06931)0.820:0.01544)1.000:0.10132)0.000:0.03917,((640867801:0.07687,(2507462304:0.07769,(((637897753:0.11989,((2509037835:0.14075,(648194984:0.08661,(648195418:0.04254,2506476786:0.04232)0.986:0.0348)0.722:0.0191)0.553:0.00998,((((640115295:0.06424,2540643958:0.01643)1.000:0.05042,2540643737:0.02653)0.542:0.01245,640115052:0.0251)0.986:0.04265,(2507146024:0.06963,2507147269:0.0417)0.641:0.01435)0.611:0.01449)0.989:0.03824)0.424:0.02187,((640099739:0.01547,2525334810:0.02122)0.833:0.00545,(640100248:0.00445,2525335778:0.02442)0.761:0.0023)1.000:0.12528)0.944:0.03961,643570914:0.0938)0.959:0.03885)0.984:0.04627)0.758:0.02972,2509039570:0.05614)1.000:0.19872)0.185:0.04422)0.999:0.13795)0.646:0.06267)0.977:0.07517)0.979:0.05715);' old_tree = Tree.get(schema='newick', data=old_tree_newick) tree_to_reroot = Tree.get(schema='newick', data=new_tree_newick) r = Rerooter() reann = Reannotator() new_tree = r.reroot_by_tree( r.reroot(old_tree), r.reroot(tree_to_reroot)) expected_lefts = old_tree.seed_node.child_nodes()[0].leaf_nodes() expected_rights = old_tree.seed_node.child_nodes()[1].leaf_nodes() for tip in expected_lefts: self.assertTrue(tip.taxon.label in [t.taxon.label for t in new_tree.seed_node.child_nodes()[1].leaf_nodes()]) for tip in expected_rights: self.assertTrue(tip.taxon.label in [t.taxon.label for t in new_tree.seed_node.child_nodes()[0].leaf_nodes()]) self.assertEqual(len(tree_to_reroot.leaf_nodes()), len(new_tree.leaf_nodes()))
def main(): d1 = sys.argv[1] d2 = sys.argv[2] d1_name = basename(d1) d2_name = basename(d2) print('og {} {}'.format(d1_name, d2_name)) d1_files = list(sorted(glob(join(d1, '*', 'RAxML_bipartitions.bipart')))) d2_files = list(sorted(glob(join(d2, '*', 'RAxML_bipartitions.bipart')))) assert len(d1_files) == len(d2_files) for fn1, fn2 in zip(d1_files, d2_files): t1 = Tree.get(path=fn1, schema='newick') t2 = Tree.get(path=fn2, schema='newick') assert tostr(t1) == tostr(t2) t1_og = basename(dirname(fn1)) t2_og = basename(dirname(fn2)) assert t1_og == t2_og labs = zip(get_node_labels(t1), get_node_labels(t2)) for l1, l2 in labs: print(t1_og, l1, l2)
def run_tact(script_runner, datadir, stem): backbone = os.path.join(datadir, stem + ".backbone.tre") taxonomy = os.path.join(datadir, stem + ".taxonomy.tre") taxed = Tree.get(path=taxonomy, schema="newick") bbone = Tree.get(path=backbone, schema="newick") result = script_runner.run("tact_add_taxa", "--taxonomy", taxonomy, "--backbone", backbone, "--output", ".tact-pytest-" + stem, "-vv") assert result.returncode == 0 output = ".tact-pytest-" + stem + ".newick.tre" tacted = Tree.get(path=output, schema="newick") ss = tacted.as_ascii_plot() sys.stderr.write(ss) result = script_runner.run("tact_check_results", output, "--taxonomy", taxonomy, "--backbone", backbone, "--output", ".tact-pytest-" + stem + ".check.csv", "--cores=1") assert result.returncode == 0 return (tacted, taxed, bbone)
def write_and_read_nexus(filename, header, tree_id, tree_str): tns = TaxonNamespace(is_case_sensitive=True) # write a temp file containing tree with open(filename, "w") as f: for line in header + ["tree " + tree_id + " " + tree_str]: f.write(line + "\n"); # read tree as dendropy tree tree = Tree.get(path=filename, schema="nexus", taxon_namespace=tns, case_sensitive_taxon_labels=True, suppress_internal_node_taxa=False) return tree
def main(OT_filehandle, OTTs_to_keep, outfile): #read in tree, but don't create taxa (faster)\ tree = Tree.get(stream=OT_filehandle, schema="newick", suppress_leaf_node_taxa=True) for node in tree.postorder_node_iter(): if hasattr(node, 'keep') or node_label_in(node, OTTs_to_keep): if node.parent_node: #this is not the root node.parent_node.keep=True else: if not hasattr(node, 'keep'): node.parent_node.remove_child(node, suppress_unifurcations=False) tree.write(file=outfile, schema='newick', suppress_leaf_node_labels=False)
def root_tree(f_name, out): t = Tree.get(path=f_name, schema="newick", rooting='force-rooted') t.reroot_at_midpoint() f = open(out, "w+") t.write(path=out, schema="newick", suppress_rooting=True, real_value_format_specifier="12.8f") f.close()
def get_bipart(ts, species): t = Tree.get(data=ts, schema='newick') hash_node = t.find_node(lambda n: n.label == '#1') sub_nodes = set(n.taxon.label for n in hash_node.leaf_iter()) all_nodes = set(n.taxon.label for n in t.leaf_node_iter()) b1 = ''.join('1' if l in sub_nodes else ('0' if l in all_nodes else '?') for l in species) b2 = ''.join(neg(v) for v in b1) assert b1 != b2 assert len(b1) == len(b2) and len(b1) == len(species) return min(b1, b2)
def assert_tree_equal_no_labels(self, expected_newick, observed_tree): '''should include some tree ordering because ordering of children is not relevant, but eh for now''' expected = Tree.get(data=expected_newick, schema='newick', rooting='force-rooted') def prep_tree(tree): for n in tree.internal_nodes(): n.label = None if n.edge.length is None: n.edge.length=0.0 tree = self.sort_tree(tree) prep_tree(expected) prep_tree(observed_tree) self.assertEqual(str(self.sort_tree(expected)), str(self.sort_tree(observed_tree)))
def test_write_fasttree_newick(self): tc = DendropyTreeCleaner() tree = Tree.get(data="((a,b),(d,e))root;", schema='newick') self.assertEqual("((a,b),(d,e));\n", self.clean(tc, tree)) # Internal labels should be removed. tree = Tree.get(data="((a_2,b)c,(d,e)f)root;", schema='newick') self.assertEqual("((a_2,b),(d,e));\n", self.clean(tc, tree)) # Quoted spaces should become underscores. tree = Tree.get(data="(('a 2',b),(d,e))root;", schema='newick') self.assertEqual("((a_2,b),(d,e));\n", self.clean(tc, tree)) # Test underscores that are quoted. tree = Tree.get(data="(('a_2',b),(d,e))root;", schema='newick') self.assertEqual("((a_2,b),(d,e));\n", self.clean(tc, tree)) # Test dashes tree = Tree.get(data="((ANME-2dV10_01644,b),(d,e))root;", schema='newick') self.assertEqual("((ANME-2dV10_01644,b),(d,e));\n", self.clean(tc, tree)) # A more real world example with '~' characters (which never mattered actually). tree = Tree.get( data= "('Asulf_Archaeoglobus.1_2280~2522125074':7.17,(('Afulgi_764~2528311132':0.0,'CP006577_764~2588253768':0.0):0.0,'AE000782_746~638154502':0.0)'s__Archaeoglobus fulgidus':7.555):1.461;\n", schema='newick') self.assertEqual( "(Asulf_Archaeoglobus.1_2280~2522125074:7.17,((Afulgi_764~2528311132:0.0,CP006577_764~2588253768:0.0):0.0,AE000782_746~638154502:0.0):7.555):1.461;\n", self.clean(tc, tree))
def test_reroot_trifurcated_tree_at_longest_child(self): test_tree_1 = Tree.get(schema='newick', data=u'(A:0.1,B:0.2,(C:0.3,D:0.4):0.5);') test_tree_2 = Tree.get(schema='newick', data=u'(A:0.5,B:0.2,(C:0.3,D:0.4):0.1);') test_tree_3 = Tree.get(schema='newick', data=u'(A:0.2,B:0.5,(C:0.3,D:0.4):0.1);') expected_test_tree_1 = str( Tree.get(schema='newick', data=u"((C:0.3,D:0.4):0.25,(A:0.1,B:0.2):0.25);")) expected_test_tree_2 = str( Tree.get(schema='newick', data=u"(A:0.25,(B:0.2,(C:0.3,D:0.4):0.1):0.25);")) expected_test_tree_3 = str( Tree.get(schema='newick', data=u"(B:0.25,(A:0.2,(C:0.3,D:0.4):0.1):0.25);")) rerooted_test_tree_1 = str(Rerooter().reroot(test_tree_1)).strip() rerooted_test_tree_2 = str(Rerooter().reroot(test_tree_2)).strip() rerooted_test_tree_3 = str(Rerooter().reroot(test_tree_3)).strip() self.assertEqual(rerooted_test_tree_1, expected_test_tree_1) self.assertEqual(rerooted_test_tree_2, expected_test_tree_2) self.assertEqual(rerooted_test_tree_3, expected_test_tree_3)
def runProgram(referenceTreeFile, sampleTreeList, bootstrap_cutoff_value=80, output_tree="output_tree.tre", verbose=False, quiet=False, timing=False): if verbose: print("Reference Tree: ", referenceTreeFile) print("Sample Tree List: ", sampleTreeList) print("Bootstrap Cutoff Value: ", bootstrap_cutoff_value) print("Output Tree File: ", output_tree) if timing: verbose = False try: reference_tree = Tree.get(path=referenceTreeFile, schema="newick", preserve_underscores=True) except: print( "Error with file '{}': please only use files with newick tree format" .format(referenceTreeFile)) sys.exit() reference_tree_namespace = reference_tree.taxon_namespace sample_tree_list = readTrees(sampleTreeList, reference_tree_namespace, quiet) # Check if gene tree taxon namespace matches reference tree for s in sample_tree_list: if not reference_tree_namespace.has_taxa_labels( s.taxon_namespace.labels()): print( 'Error: reference tree is of a different taxon namespace as the sample trees' ) return full_quartet_dictionary = buildFullSupport(sample_tree_list, bootstrap_cutoff_value, verbose, quiet, timing) if verbose: print("Full quartet dictionary with support values") [ print(quartet, full_quartet_dictionary[quartet]) for quartet in full_quartet_dictionary ] print() buildLabeledTree(referenceTreeFile, full_quartet_dictionary, output_tree, quiet, timing)
def summary_to_nw_str(mcmc_tree_filename): calc_summary_tree(mcmc_tree_filename) # convert summary nexus tree to newick for ete3 tns = TaxonNamespace(is_case_sensitive=True) filename = mcmc_tree_filename + "_summary.tree" dp_tree = Tree.get(path=filename, schema="nexus", taxon_namespace=tns, case_sensitive_taxon_labels=True, suppress_internal_node_taxa=False) # drop all annotations and illegal characters return dp_tree.as_string('newick', suppress_annotations=True)[5:].rstrip("\n")
def test_joel_bug(self): tree67 = u'''[ Thu Sep 10 15:55:28 2015: Loaded from /srv/projects/graftm/testing_files/testing_graftM/tmp_01_decorate/67_otus.tree Thu Sep 10 15:56:18 2015: tree_67_otus saved to /srv/projects/graftm/testing_files/testing_graftM/tmp_01_decorate/67_otus.rerooted.tree ] ((((1928988:0.10866,2909029:0.15809):0.03546,((801940:0.10703,(3825327:0.12686,4298210:0.09398):0.07480):0.02560,729293:0.21465):0.01982):0.02058,((426860:0.16275,219508:0.12556):0.02403,((1128285:0.06200,4455990:0.07954):0.07525,(815912:0.12348,(3770699:0.23707,823009:0.09955):0.04225):0.01489):0.01849):0.01531):0.09184,(((2361381:0.22741,(3779572:0.06720,4363260:0.07438):0.01460):0.04187,(((((((734152:0.13251,4091454:0.12251):0.03552,((576962:0.14097,(1145804:0.14124,3106714:0.14895):0.01964):0.01668,(2014493:0.15560,(3192744:0.11018,(202294:0.07263,1138804:0.08032):0.05015):0.01277):0.01187):0.01016):0.01486,4323734:0.15004):0.00053,(759363:0.05430,4459468:0.04835):0.03216):0.01531,4322265:0.12041):0.01024,(4391683:0.11058,(229854:0.07735,(4336814:0.09937,((150571:0.07911,2730777:0.10930):0.04404,((4042859:0.25381,(717487:0.13914,4363563:0.19585):0.02281):0.02587,(((3190878:0.16480,4452949:0.07312):0.05029,(4015030:0.10339,(4438491:0.04779,(2286116:0.08699,(4251079:0.03657,4349225:0.02256):0.01189):0.01091):0.04963):0.01748):0.02917,(3014179:0.16455,(2170497:0.16101,(2107103:0.22406,951205:0.11633):0.02436):0.02574):0.03041):0.01561):0.02862):0.02589):0.01914):0.01811):0.01347):0.01451,((182569:0.14758,4363259:0.07793):0.04894,696036:0.14901):0.01514):0.01624):0.02659,(3761685:0.11278,4423155:0.16503):0.03965):0.09184); ''' tree70 = u'((4423550:0.17275,((4091454:0.108,4427993:0.1045)50:0.01575,((123662:0.06599,(3269889:0.12737,(104534:0.06041,734152:0.09136)20:0.00526)80:0.01669)90:0.01398,(300695:0.10755,225636:0.1317)100:0.0405)0:0.01073)40:0.0128)20:0.00782,(4377103:0.09243,((172946:0.08097,1145804:0.08645)100:0.02986,(1941303:0.0953,4332975:0.09505)90:0.00838)100:0.02206)90:0.0272,((((1931714:0.07012,(4322265:0.10071,4343117:0.13235)100:0.01842)100:0.03116,(((759363:0.05402,4459468:0.0433)100:0.02405,(294612:0.14484,2679839:0.1009)90:0.02132)70:0.01331,((((((730039:0.15444,((4015030:0.11176,(4438491:0.04568,(4349225:0.02406,(2286116:0.08501,(4251079:0.02026,4386156:0.01582)80:0.01016)40:0.0097)80:0.0168)100:0.03826)50:0.01397,(4308961:0.10766,4452949:0.05355)90:0.06215)40:0.01455)50:0.01325,(((1718272:0.12738,(150571:0.08502,(699249:0.03117,2730777:0.03253)100:0.06302)70:0.02174)60:0.03847,(((2107103:0.20025,3190878:0.14435)40:0.03601,(1824285:0.10892,3014179:0.14706)30:0.02039)0:0.01309,((3366304:0.09202,951205:0.07509)100:0.05732,2170497:0.16332)90:0.02722)10:0.01937)0:0.01868,(3064426:0.20791,((1837676:0.14477,(4363563:0.14803,4479774:0.10823)90:0.04638)90:0.03766,(4042859:0.2295,717487:0.15674)40:0.01749)20:0.01416)0:0.01063)0:0.03387)100:0.04795,4336814:0.08037)0:0.02958,(346735:0.11193,4391683:0.07639)60:0.00894)0:0.01312,1142178:0.07594)0:0.01881,(229854:0.0646,4460175:0.09289)90:0.02422)20:0.01731)0:0.01339)0:0.00777,(((2984017:0.05634,4340384:0.07722)80:0.03016,(((4371218:0.13005,(1133483:0.08797,3106714:0.09717)90:0.02053)80:0.02174,(3256066:0.08328,4022282:0.11841)90:0.03619)100:0.03392,((202294:0.06795,1138804:0.07777)100:0.05296,(3192744:0.09608,(2014493:0.11684,(180127:0.06532,4417185:0.0713)100:0.03824)100:0.0368)40:0.01663)70:0.00787)50:0.01733)10:0.0083,(222095:0.1391,(288404:0.13004,(4323734:0.07601,4446882:0.06844)60:0.01661)100:0.02863)40:0.01639)0:0.00846)0:0.0135,(((((1133369:0.07769,4336154:0.07979)100:0.11778,(((708774:0.0822,((114724:0.047,82092:0.04936)100:0.11526,(201206:0.10329,4423155:0.14181)60:0.03138)40:0.01886)80:0.03209,(202302:0.11673,3761685:0.09059)100:0.02325)90:0.02946,(((576962:0.11188,202459:0.09918)90:0.033,(213358:0.0989,(3390949:0.09853,3726184:0.09836)90:0.03298)90:0.02315)20:0.01425,202949:0.15903)0:0.01188)20:0.02709)10:0.01609,((4323100:0.0982,4409929:0.10612)60:0.01386,((696036:0.11283,(203529:0.18615,202449:0.08377)10:0.02209)30:0.02916,((2361381:0.18808,203220:0.10905)100:0.04166,(4363260:0.07208,(3779572:0.04977,114015:0.13268)70:0.02151)70:0.01055)100:0.04229)0:0.01717)0:0.01634)0:0.00519,(539547:0.12233,(4409453:0.14784,(4363259:0.05689,((268769:0.0594,266521:0.05311)100:0.04977,(182569:0.10314,4463866:0.07165)70:0.01505)100:0.04024)80:0.01602)100:0.05088)20:0.02162)0:0.0112,((573196:0.11279,((((3825327:0.11767,4298210:0.09472)100:0.07495,(836195:0.11165,801940:0.09002)100:0.02232)90:0.0347,((1928988:0.1129,(1129716:0.13293,2909029:0.13959)50:0.01858)70:0.02572,(((815912:0.12176,((219508:0.13512,(426860:0.12643,(202758:0.04748,4344033:0.03692)100:0.11429)90:0.0487)20:0.00791,((823117:0.10669,823009:0.0888)90:0.0381,3770699:0.24911)50:0.02136)40:0.02309)30:0.01326,(4455990:0.05381,(1128285:0.06585,4271527:0.03794)70:0.02727)100:0.06911)10:0.01546,4097115:0.09311)30:0.02142)20:0.01039)20:0.02855,(729293:0.18117,3871866:0.11553)90:0.03599)100:0.15854)20:0.02836,150700:0.13922)20:0.02787)0:0.00717)0:0.00859)100;' old_tree = Tree.get(schema='newick', data=tree67) tree_to_reroot = Tree.get(schema='newick', data=tree70) new_tree = Rerooter().reroot_by_tree( old_tree, tree_to_reroot) expected_lefts = old_tree.seed_node.child_nodes()[0].leaf_nodes() expected_rights = old_tree.seed_node.child_nodes()[1].leaf_nodes() for tip in expected_lefts: self.assertTrue(tip.taxon.label in [t.taxon.label for t in new_tree.seed_node.child_nodes()[1].leaf_nodes()]) for tip in expected_rights: self.assertTrue(tip.taxon.label in [t.taxon.label for t in new_tree.seed_node.child_nodes()[0].leaf_nodes()]) self.assertEqual(len(tree_to_reroot.leaf_nodes()), len(new_tree.leaf_nodes()))
def generate_ATT_from_phylesystem(aln, workdir, study_id, tree_id, phylesystem_loc='api'): """gathers together tree, alignment, and study info - forces names to otu_ids. Outputs AlignTreeTax object. an alignemnt, a Input can be either a study ID and tree ID from OpenTree Alignemnt need to be a Dendropy DNA character matrix!""" #TODO CHECK ARGS assert(isinstance(aln, datamodel.charmatrixmodel.DnaCharacterMatrix)) for tax in aln.taxon_namespace: tax.label = tax.label.replace(" ", "_") #Forcing all spaces to underscore UGH nexson = get_nexson(study_id, phylesystem_loc) ott_ids = get_subtree_otus(nexson, tree_id=tree_id, subtree_id="ingroup", return_format="ottid") ott_mrca = get_mrca_ott(ott_ids) newick = extract_tree(nexson, tree_id, PhyloSchema('newick', output_nexml2json='1.2.1', content="tree", tip_label="ot:originalLabel")) newick = newick.replace(" ", "_") #UGH Very heavy handed, need to make sure happens on alignement side as well. tre = Tree.get(data=newick, schema="newick", preserve_underscores=True, taxon_namespace=aln.taxon_namespace) otus = get_subtree_otus(nexson, tree_id=tree_id) otu_dict = {} orig_lab_to_otu = {} treed_taxa = {} for otu_id in otus: otu_dict[otu_id] = extract_otu_nexson(nexson, otu_id)[otu_id] otu_dict[otu_id]['^physcraper:status'] = "original" otu_dict[otu_id]['^physcraper:last_blasted'] = "1900/01/01" orig = otu_dict[otu_id].get(u'^ot:originalLabel').replace(" ", "_") orig_lab_to_otu[orig] = otu_id treed_taxa[orig] = otu_dict[otu_id].get(u'^ot:ottId') for tax in aln.taxon_namespace: try: tax.label = orig_lab_to_otu[tax.label].encode('ascii') except KeyError: sys.stderr.write("{} doesn't have an otu id. It is being removed from the alignement. This may indicate a mismatch between tree and alignement\n".format(tax.label)) #need to prune tree to seqs and seqs to tree... otu_newick = tre.as_string(schema="newick") return AlignTreeTax(otu_newick, otu_dict, aln, ingroup_mrca=ott_mrca, workdir=workdir) #newick should be bare, but alignement should be DNACharacterMatrix
def read_matrix_and_tree(char_file_path, tree_file_path, char_type=DnaCharacterMatrix, char_schema='fasta', tree_schema='newick'): if char_file_path: d = char_type.get(path=char_file_path, schema=char_schema) tn = d.taxon_namespace tn.is_mutable = False else: d, tn = None, None tree = Tree.get(path=tree_file_path, schema=tree_schema, preserve_underscores=True, taxon_namespace=tn) return d, tree
def test_input_unrooted_tree(self): otu61 = os.path.join(path_to_data, '61_otus.gpkg','61_otus.refpkg') with tempfile.NamedTemporaryFile(suffix='.fa') as bad_alignment: with tempdir.TempDir() as tmp: Create(prerequisites).main( taxtastic_taxonomy=os.path.join(otu61,'61_otus_taxonomy.csv'), taxtastic_seqinfo=os.path.join(otu61,'61_otus_seqinfo.csv'), # created with newick_utils: # nw_prune test/data/61_otus.gpkg/61_otus.refpkg/61_otus.tre 4459468 >test/data/61_otus.without_4459468.tre unrooted_tree=os.path.join(path_to_data,'create','61_otus.without_4459468.tre'), sequences=os.path.join(path_to_data,'create','61_otus.without_4459468.fasta'), alignment=os.path.join(path_to_data,'create','61_otus.without_4459468.aln.fasta'), prefix=tmp, force=True) gpkg = GraftMPackage.acquire(tmp) tree=Tree.get(schema='newick', data=open(gpkg.reference_package_tree_path()).readline()) self.assertEqual(21, len(tree.leaf_nodes()))
def generate_streamed_alignment(self): """runs the key steps and then replaces the tree and alignemnt with the expanded ones""" self.read_blast() pickle.dump(self, open('{}/scrape.p'.format(self.workdir), 'wb')) if len(self.new_seqs) > 0: self.remove_identical_seqs() self.data.write_files() #should happen before aligning in case of pruning if len(self.new_seqs_otu_id) > 0:#TODO rename to something more intutitive self.write_query_seqs() self.align_query_seqs() self.data.reconcile() self.place_query_seqs() self.est_full_tree() self.data.tre = Tree.get(path="{}/RAxML_bestTree.{}".format(self.workdir, self.date), schema="newick", preserve_underscores=True, taxon_namespace=self.data.aln.taxon_namespace) self.data.write_files() if os.path.exists("{}/previous_run".format(self.workdir)): prev_dir = "{}/previous_run{}".format(self.workdir, self.date) i = 0 while os.path.exists(prev_dir): i+=1 prev_dir = "previous_run" + str(i) os.rename("{}/previous_run".format(self.workdir), prev_dir) os.rename(self.blast_subdir, "{}/previous_run".format(self.workdir)) if os.path.exists("{}/last_completed_update".format(self.workdir)): os.rename(self.tmpfi, "{}/last_completed_update".format(self.workdir)) for filename in glob.glob('{}/RAxML*'.format(self.workdir)): os.rename(filename, "{}/previous_run/{}".format(self.workdir, filename.split("/")[1])) for filename in glob.glob('{}/papara*'.format(self.workdir)): os.rename(filename, "{}/previous_run/{}".format(self.workdir, filename.split("/")[1])) os.rename("{}/{}".format(self.workdir, self.newseqs_file), "{}/previous_run/newseqs.fasta".format(self.workdir)) self.data.write_labelled() self.new_seqs = {} #Wipe for next run self.new_seqs_otu_id = {} self.repeat = 1 else: sys.stdout.write("No new sequences after filtering.\n") self.repeat = 0 else: sys.stdout.write("No new sequences found.\n") self.repeat = 0 self.reset_markers() pickle.dump(self, open('{}/scrape.p'.format(self.workdir), 'wb')) pickle.dump(self.data.otu_dict, open('{}/otu_dict.p'.format(self.workdir), 'wb'))
def __init__(self, newick, otu_dict, alignment, ingroup_mrca, workdir): self.aln = alignment self.tre = Tree.get(data=newick, schema="newick", preserve_underscores=True, taxon_namespace=self.aln.taxon_namespace) self.otu_dict = otu_dict self.ps_otu = 1 #iterator for new otu IDs self._reconcile_names() self.workdir = workdir #TODO - is this where the workdir should live? if not os.path.exists(self.workdir): os.makedirs(self.workdir) assert int(ingroup_mrca) self.ott_mrca = ingroup_mrca self.orig_seqlen = [] #FIXME self.gi_dict = {} self.orig_aln = alignment self.orig_newick = newick
def mutable_read_matrix_and_tree(char_file_path, tree_file_path, char_type=DnaCharacterMatrix, char_schema='fasta', tree_schema='newick'): '''Reads in tree and character matrix, mutable namespace means names may not match''' if char_file_path: char_mat = char_type.get(path=char_file_path, schema=char_schema) # make the taxon_namespace mutable, # so that tree can be read even if different char_mat.taxon_namespace.is_mutable = True tree = Tree.get(path=tree_file_path, schema=tree_schema, preserve_underscores=True, taxon_namespace=char_mat.taxon_namespace) else: char_mat, tree = None, None return char_mat, tree
def test_reroot_trifurcated_tree_at_longest_child(self): test_tree_1 =Tree.get(schema='newick', data=u'(A:0.1,B:0.2,(C:0.3,D:0.4):0.5);') test_tree_2 =Tree.get(schema='newick', data=u'(A:0.5,B:0.2,(C:0.3,D:0.4):0.1);') test_tree_3 =Tree.get(schema='newick', data=u'(A:0.2,B:0.5,(C:0.3,D:0.4):0.1);') expected_test_tree_1 = str(Tree.get(schema='newick', data=u"((C:0.3,D:0.4):0.25,(A:0.1,B:0.2):0.25);")) expected_test_tree_2 = str(Tree.get(schema='newick', data=u"(A:0.25,(B:0.2,(C:0.3,D:0.4):0.1):0.25);")) expected_test_tree_3 = str(Tree.get(schema='newick', data=u"(B:0.25,(A:0.2,(C:0.3,D:0.4):0.1):0.25);")) rerooted_test_tree_1 = str(Rerooter().reroot(test_tree_1)).strip() rerooted_test_tree_2 = str(Rerooter().reroot(test_tree_2)).strip() rerooted_test_tree_3 = str(Rerooter().reroot(test_tree_3)).strip() self.assertEqual(rerooted_test_tree_1, expected_test_tree_1) self.assertEqual(rerooted_test_tree_2, expected_test_tree_2) self.assertEqual(rerooted_test_tree_3, expected_test_tree_3)
def place_query_seqs(self): """runs raxml on the tree, and the combined alignment including the new quesry seqs Just for placement, to use as starting tree.""" if os.path.exists("RAxML_labelledTree.PLACE"): os.rename(filename, "RAxML_labelledTreePLACE.tmp") sys.stdout.write("placing query sequences \n") os.chdir(self.workdir) p1 = subprocess.call(["raxmlHPC", "-m", "GTRCAT", "-f", "v", "-s", "papara_alignment.extended", "-t", "random_resolve.tre", "-n", "PLACE"]) placetre = Tree.get(path="RAxML_labelledTree.PLACE", schema="newick", preserve_underscores=True) placetre.resolve_polytomies() for taxon in placetre.taxon_namespace: if taxon.label.startswith("QUERY"): taxon.label = taxon.label.replace("QUERY___", "") placetre.write(path="place_resolve.tre", schema="newick", unquoted_underscores=True) os.chdir('..') self._query_seqs_placed = 1
def write_labelled(self, label='^ot:ottTaxonName', treepath="labelled.tre", alnpath="labelled.fas"): """output tree and alignement with human readble labels Jumps through abunch of hoops to make labels unique. NOT MEMORY EFFICIENT AT ALL""" assert label in ['^ot:ottTaxonName', "^ot:originalLabel", "^ot:ottId", "^ncbi:taxon"] tmp_newick = self.tre.as_string(schema="newick") tmp_tre = Tree.get(data=tmp_newick, schema="newick", preserve_underscores=True) tmp_fasta = self.aln.as_string(schema="fasta") tmp_aln = DnaCharacterMatrix.get(data=tmp_fasta, schema="fasta", taxon_namespace=tmp_tre.taxon_namespace) new_names = set() for taxon in tmp_tre.taxon_namespace: new_label = self.otu_dict[taxon.label].get(label) if new_label: if new_label in new_names: new_label = " ".join([new_label, taxon.label]) new_names.add(new_label) taxon.label = new_label elif self.otu_dict[taxon.label].get("^ot:originalLabel"): new_label = self.otu_dict[taxon.label].get("^ot:originalLabel") if new_label in new_names: new_label = " ".join([new_label, taxon.label]) new_names.add(new_label) taxon.label = new_label elif self.otu_dict[taxon.label].get("^ncbi:taxon"): new_label = " ".join(["ncbi", str(self.otu_dict[taxon.label].get("^ncbi:taxon"))]) if new_label in new_names: new_label = " ".join([new_label, taxon.label]) new_names.add(new_label) taxon.label = new_label tmp_tre.write(path="{}/{}".format(self.workdir, treepath), schema="newick", unquoted_underscores=True, suppress_edge_lengths=False) tmp_aln.write(path="{}/{}".format(self.workdir, alnpath), schema="fasta")
def test_write_fasttree_newick(self): tc = DendropyTreeCleaner() tree = Tree.get(data="((a,b),(d,e))root;", schema='newick') s = StringIO() tc.write_fasttree_newick(tree, s) self.assertEqual("((a,b),(d,e));\n", s.getvalue()) # Internal labels should be removed. tree = Tree.get(data="((a_2,b)c,(d,e)f)root;", schema='newick') s = StringIO() tc.write_fasttree_newick(tree, s) self.assertEqual("((a_2,b),(d,e));\n", s.getvalue()) # Quoted spaces should become underscores. tree = Tree.get(data="(('a 2',b),(d,e))root;", schema='newick') s = StringIO() tc.write_fasttree_newick(tree, s) self.assertEqual("((a_2,b),(d,e));\n", s.getvalue()) # Test underscores that are quoted. tree = Tree.get(data="(('a_2',b),(d,e))root;", schema='newick') s = StringIO() tc.write_fasttree_newick(tree, s) self.assertEqual("((a_2,b),(d,e));\n", s.getvalue()) # Test dashes tree = Tree.get(data="((ANME-2dV10_01644,b),(d,e))root;", schema='newick') s = StringIO() tc.write_fasttree_newick(tree, s) self.assertEqual("((ANME-2dV10_01644,b),(d,e));\n", s.getvalue()) # A more real world example with '~' characters (which never mattered actually). tree = Tree.get( data=u"('Asulf_Archaeoglobus.1_2280~2522125074':7.17,(('Afulgi_764~2528311132':0.0,'CP006577_764~2588253768':0.0):0.0,'AE000782_746~638154502':0.0)'s__Archaeoglobus fulgidus':7.555):1.461;\n", schema='newick') s = StringIO() tc.write_fasttree_newick(tree, s) self.assertEqual("(Asulf_Archaeoglobus.1_2280~2522125074:7.17,((Afulgi_764~2528311132:0.0,CP006577_764~2588253768:0.0):0.0,AE000782_746~638154502:0.0):7.555):1.461;\n", s.getvalue())
def warn(*objs): print(*objs, file=sys.stderr) #construct dict of OTTid:PopularityMetric popularity = {}; tsvin = csv.DictReader(args.popularity_file, delimiter='\t') viewcols = [col for col in tsvin.fieldnames if 'pagecounts' in col] for row in tsvin: try: views = [int(row[col]) for col in viewcols if row[col] and row[col].isdigit()] trMeanViews = mean(sorted(views)[:-2]) popularity[row["OTTid"]] = (float(row["page_size"]) * trMeanViews)**0.5 #take the sqrt transform except (StatisticsError, ValueError): #perhaps data is absent, a number is NA or we are trying to take a mean of an empty list - if so, ignore pass; tree = Tree.get(file=args.intree, schema='newick', suppress_edge_lengths=True, preserve_underscores=True, suppress_leaf_node_taxa=True) #put popularity as edge length for node in tree.preorder_node_iter(): if node.label in args.exclude: node.edge_length = 0 else: try: node.edge_length = popularity[node.label.rsplit("_ott",1)[1]] except (LookupError, AttributeError): node.edge_length = 0 #go up the tree from the tips, summing up the popularity indices beneath if args.branch_length in ['sum_descendant_popularities', 'sum_ancestor_and_descendant_popularities']: for node in tree.postorder_node_iter(): if node.is_leaf():
def node_label_method(tree, outgroup): '''Interpret node labels as node attributes (default).''' outgroup_node = tree.find_node_with_taxon_label(outgroup) new_root = outgroup_node.parent_node tree.reseed_at(new_root) return tree def rooted_bipartition_method(tree, outgroup): '''Interpret node labels as branch support values.''' benc = tree.encode_bipartitions() support_values = {} for nd in tree: support_values[nd.bipartition] = float(nd.label) if nd.label is not None else 1.0 outgroup_node = tree.find_node_with_taxon_label(outgroup) new_root = outgroup_node.parent_node tree.reseed_at(new_root) tree.encode_bipartitions() for nd in tree: nd.label = support_values.get(nd.bipartition, "not_specified") tree.seed_node.edge.length = None return tree tree = Tree.get(file=open('test.nw'), schema="newick")#, rooting="force-rooted") rooted_bipartition_method(tree, 'X') nw = tree.as_string(schema='newick').strip() print nw.replace('[&R] ', '') # Related discussion: https://github.com/jeetsukumaran/DendroPy/issues/53
stops.append(len(seq.values())) stops.sort() stop = stops[int(len(stops)/2)] d = {} for taxon, seq in orig_seq.items(): d[str(taxon.label)] = seq.values()[:stop] dna_orig = DnaCharacterMatrix.from_dict(d) dna_taxa = [i for i in dna_orig.taxon_namespace] tre_orig = Tree.get(path = "{}_random_resolve.tre".format("ascomycota"), schema = "newick",taxon_namespace=dna_orig.taxon_namespace) "" treed_taxa = [i.taxon for i in tre_orig.leaf_nodes()] tre_orig.prune_taxa(set(treed_taxa) - set(dna_taxa)) for taxon in set(dna_taxa) - set(treed_taxa): del d[taxon.label] #####NEXT STEPS!!! #make a function that doe sthis dumb shit in orig as well dna_orig = DnaCharacterMatrix.from_dict(d) tre_orig.write(path = "{}_orig_cut.tre".format(runname), schema = "newick", unquoted_underscores=True, suppress_edge_lengths=True)
else: onlyfiles.append(AllFiles[j]) for j in range(len(onlyfiles)): try: path = file_path+'/'+onlyfiles[j] fil = open('/home/4/u1we1f44/Documents/appbio15/projekt/data/'+path, 'r') lines_list=fil.readlines() fil.close() test = SeqDic(lines_list) # If this dose not worke we do not have a FASTA file ## # Makes a newick tree and checks if the referense tree is recovered. The none reducing file. ## line = 'cat /home/4/u1we1f44/Documents/appbio15/projekt/data/'+path+' | fastprot -I fasta -O phylip | fnj -I phylip -O "newick" -o "Treeout.txt"' os.system(line) TreePath=file_path+'/'+RefTree t1=Tree.get(file=open('/home/4/u1we1f44/Documents/appbio15/projekt/data/'+TreePath,'r'),schema="newick",tree_offset=0) t2=Tree.get(file=open('/home/4/u1we1f44/Documents/appbio15/projekt/src/Treeout.txt','r'),schema="newick",tree_offset=0,taxon_namespace=t1.taxon_namespace) t1.encode_bipartitions() t2.encode_bipartitions() if treecompare.symmetric_difference(t1, t2)==0: NotFixedCount += 1 os.remove('/home/4/u1we1f44/Documents/appbio15/projekt/src/Treeout.txt') Total += 1 else: Total += 1 os.remove('/home/4/u1we1f44/Documents/appbio15/projekt/src/Treeout.txt') ## # Makes a temporary file. In the temporary file with data with the nosie columns remoeved. MAkes a newick tree and checks if the refernse tree is recovered. # The nosie columns removed. ## os.system("touch temp.fa")
def build_subsets_tree(self, curr_tmp_dir_par,build_min_tree=True): # uym2 added: add option for MST if build_min_tree: _LOG.debug("START building Minimum Spanning Tree") grouping = {} groupName2jobName = {} for node in self.tree._tree.leaf_node_iter(): groupName = self.pasta_team.subsets[node.taxon.label].tmp_dir_par[len(curr_tmp_dir_par)+1:] grouping[node.taxon.label] = groupName.replace("/","") groupName2jobName[groupName] = self.pasta_team.subsets[node.taxon.label] subsets_tree = build_groups_MST(self.tree._tree,grouping) for node in subsets_tree.postorder_node_iter(): if node.is_leaf(): node.taxon.label = node.taxon.label.replace("d","/d") node.label = node.label.replace("d","/d") self.pasta_team.subsets = groupName2jobName MST = PhylogeneticTree(subsets_tree) _LOG.debug("Spanning tree is:\n %s" %MST) return MST ################################### _LOG.debug("START building heuristic spanning tree") translate={} t2 = {} for node in self.tree._tree.leaf_node_iter(): nalsj = self.pasta_team.subsets[node.taxon.label] newname = nalsj.tmp_dir_par[len(curr_tmp_dir_par)+1:] translate[node.taxon.label] = newname t2[newname] = set([nalsj]) subsets_tree = PhylogeneticTree(Tree.get(data=self.tree_str,schema='newick')) for node in subsets_tree._tree.leaf_node_iter(): node.alignment_subset_job = t2[translate[node.taxon.label]] #node.alignment_subset_job = t2[node.taxon] del t2 del translate _LOG.debug("leafs labeled") #subsets_tree._tree.infer_taxa() #_LOG.debug("fake taxa inferred") #Then make sure the tree is rooted at a branch (not at a node). if len(subsets_tree._tree.seed_node.child_nodes()) > 2: for c in subsets_tree._tree.seed_node.child_nodes(): if c.edge.is_internal(): break subsets_tree._tree.is_rooted = True subsets_tree._tree.reroot_at_edge(c.edge,length1=c.edge.length/2., length2=c.edge.length/2., suppress_unifurcations=False) _LOG.debug("Subset Labeling (start):\n%s" %str(subsets_tree.compose_newick(suppress_rooting=False))[0:5000]) #_LOG.debug("Subset Labeling (start):\n%s" %str(len(subsets_tree._tree.seed_node.child_nodes()))) # Then label internal branches based on their children, and collapse redundant edges. for node in subsets_tree._tree.postorder_internal_node_iter(): # my label is the intersection of my children, # unless the intersection is empty, in which case it is the union if not hasattr(node, "alignment_subset_job") or node.alignment_subset_job is None: node.alignment_subset_job = set.intersection(*[c.alignment_subset_job for c in node.child_nodes()]) if not node.alignment_subset_job: node.alignment_subset_job = set.union(*[c.alignment_subset_job for c in node.child_nodes()]) # Now go ahead and prune any child whose label encompasses my label. # Use indexing instead of iteration, because with each collapse, # new children can be added, and we want to process them as well. i = 0; while i < len(node.child_nodes()): c = node.child_nodes()[i] if node.alignment_subset_job.issubset(c.alignment_subset_job): # Dendropy does not collapsing and edge that leads to a tip. Remove instead if c.child_nodes(): c.edge.collapse() else: node.remove_child(c) else: i += 1 node.label = "+".join(nj.tmp_dir_par[len(curr_tmp_dir_par)+1:] for nj in node.alignment_subset_job) if node.is_leaf(): node.taxon = subsets_tree._tree.taxon_namespace.new_taxon(label=node.label) _LOG.debug("Before final round, the tree is:\n %s" %str(subsets_tree.compose_newick(suppress_rooting=False))[0:5000]) # Now, the remaining edges have multiple labels. These need to # be further resolved. Do it by minimum length # First find all candidate edges that we might want to contract candidate_edges = set() for e in subsets_tree._tree.postorder_edge_iter(): if e.tail_node and e.head_node.alignment_subset_job.intersection(e.tail_node.alignment_subset_job): candidate_edges.add( (e.length,e) ) # Then sort the edges, and start removing them one by one # only if an edge is still having intersecting labels at the two ends candidate_edges = sorted(candidate_edges, key=lambda x: x[0] if x[0] else -1) for (el, edge) in candidate_edges: I = edge.tail_node.alignment_subset_job.intersection(edge.head_node.alignment_subset_job) if I: edge.tail_node.alignment_subset_job = I if edge.head_node.child_nodes(): #edge.collapse(adjust_collapsed_head_children_edge_lengths=True) edge.collapse() else: edge.tail_node.remove_child(edge.head_node) # Make sure the tree is correct, remove the actual jobs # from nodes (can cause deep-copy problems), assign a label to each # node, and keep a mapping between the labels and actual alignment job objects self.pasta_team.subsets = {} # Let this now map from subset labels to the actual alignment jobs for node in subsets_tree._tree.postorder_node_iter(): assert len(node.alignment_subset_job) == 1 nalsj = node.alignment_subset_job.pop() node.alignment_subset_job = None node.label = nalsj.tmp_dir_par[len(curr_tmp_dir_par)+1:]#only find last part of the name self.pasta_team.subsets[node.label] = nalsj if node.is_leaf(): # Add a dummy taxon, or else dendropy can get confused node.taxon = subsets_tree._tree.taxon_namespace.new_taxon(label=node.label) #subsets_tree._tree.infer_taxa() _LOG.debug("Spanning tree is:\n %s" %subsets_tree) labels = [nd.label for nd in subsets_tree._tree.postorder_node_iter()] if len(set(labels)) != len(labels): import collections raise Exception("Duplicate names found %s" %"\n".join (item for item, count in collections.Counter(labels).items() if count > 1)) return subsets_tree
from dendropy import Tree label_nodes = {'Other':0, 'Chloroplastida_ott361838':1, 'Metazoa_ott691846':2, 'Fungi_ott352914':3, 'Bacteria_ott844192':4} target_nodes = {} names = {index:re.sub("_ott\d+", "", k) for k, index in label_nodes.items()} parser = argparse.ArgumentParser(description='Count the number of unnamed nodes in a tree') parser.add_argument('treefile', type=argparse.FileType('r'), help='A newick-format tree') args = parser.parse_args() def warn(*objs): print(*objs, file=sys.stderr) tree = Tree.get(file=args.treefile, schema='newick', preserve_underscores=True, suppress_leaf_node_taxa=True) #set edge length to number of leaves for node in tree.postorder_node_iter(): if node.is_leaf(): node.n_leaves = 1 else: if node.label in label_nodes: target_nodes[node.label] = node try: node._parent_node.n_leaves += node.n_leaves except: try: node._parent_node.n_leaves = node.n_leaves except: pass #the root
lines_list = fil.readlines() fil.close() test = SeqDic(lines_list) # If this dose not worke we do not have a FASTA file ## # Makes a newick tree and checks if the referense tree is recovered. The none reducing file. ## line = ( "cat /home/4/u1we1f44/Documents/appbio15/project/data/" + path + ' | fastprot -I fasta -O phylip | fnj -I phylip -O "newick" -o "Treeout.txt"' ) os.system(line) TreePath = file_path + "/" + RefTree t1 = Tree.get( file=open("/home/4/u1we1f44/Documents/appbio15/project/data/" + TreePath, "r"), schema="newick", tree_offset=0, ) t2 = Tree.get( file=open("/home/4/u1we1f44/Documents/appbio15/project/src/Treeout.txt", "r"), schema="newick", tree_offset=0, taxon_namespace=t1.taxon_namespace, ) t1.encode_bipartitions() t2.encode_bipartitions() if treecompare.symmetric_difference(t1, t2) == 0: NotFixedCount += 1 os.remove("/home/4/u1we1f44/Documents/appbio15/project/src/Treeout.txt") Total += 1 else:
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. """ import argparse from dendropy import Tree if __name__ == '__main__': parser = argparse.ArgumentParser(description='Rescale tree height') parser.add_argument('--max-height', type=float, metavar='FLOAT', default=0.1, help='Scale longest branch to max height [0.1]') parser.add_argument('--if', dest='input_format', default='newick', choices=['newick', 'nexus', 'nexml'], help='Input tree format [newick]') parser.add_argument('--of', dest='output_format', default='newick', choices=['newick', 'nexus', 'nexml'], help='output tree format [newick]') parser.add_argument('input', type=argparse.FileType('r'), default='-', help='Input tree') parser.add_argument('output', type=argparse.FileType('w'), default='-', nargs='?', help='Output tree [stdout]') args = parser.parse_args() tr = Tree.get(file=args.input, schema=args.input_format) tr.scale_edges(args.max_height / max(tr.calc_node_root_distances())) tr.write_to_stream(args.output, args.output_format)
def sum_popularity_over_tree(tree, OTT_ptrs=None, exclude=[], pop_store='pop', verbosity=0): """Add popularity indices for branch lengths based on a phylogenetic tree (and return the tree, or the number of root descendants). We might want to exclude some names from the popularity metric (e.g. exclude archosaurs, to make sure birds don't gather popularity intended for dinosaurs). This is done by passing an array such as ['Dinosauria_ott90215', 'Archosauria_ott335588'] as the exclude argument. 'tree' can be the name of a tree file or a dendropy tree object 'pop_store' is the name of the attribute in which to store the popularity. If you wish to create a tree with popularity on the branches, you can pass in pop_store='edge_length' NB: if OTT_ptrs is given, then the popularity is stored in the object pointed to by OTT_ptrs[OTTid]['wd']['final_wiki_item']['pop'], where OTTid can be extracted from the node label in the tree. If OTT_ptrs is None, then the popularity is stored in the node object itself, in Node.data['wd']['pop']. popularity summed up and down the tree depends on the OpenTree structure, and is stored in OTT_ptrs[OTTid]['pop_ancst'] (popularity summed upwards for all ancestors of this node) and OTT_ptrs[OTTid]['pop_dscdt'] (popularity summed over all descendants). To get a measure of the sum of both ancestor and descendant popularity, just add these together we also count up the *number* of edges above each node to the root and the number of those that have a popularity measure. These are stored in OTT_ptrs[OTTid]['n_ancst'] and OTT_ptrs[OTTid]['n_pop_ancst'] we also flag up the poor seed plants (Spermatophyta_ott1007992)- we could add a little to their pop value later """ from dendropy import Tree if not isinstance(tree, Tree): tree = Tree.get(file=tree, schema='newick', suppress_edge_lengths=True, preserve_underscores=True, suppress_leaf_node_taxa=True) if verbosity: print(" Tree read for phylogenetic popularity calc: mem usage {:.1f} Mb".format(memory_usage_resource()), file=sys.stderr) #put popularity into the pop_store attribute for node in tree.preorder_node_iter(): if node.label in exclude: node.pop_store=0 else: try: node.pop_store = float(OTT_ptrs[int(node.label.rsplit("_ott",1)[1])]['wd']['final_wiki_item']['pop']) if OTT_ptrs else node.data['wd']['final_wiki_item']['pop'] node.has_pop = True except (LookupError, AttributeError, ValueError): node.pop_store=0 node.has_pop = False #go up the tree from the tips, summing up the popularity indices beneath and adding the number of descendants for node in tree.postorder_node_iter(): if node.is_leaf(): node.descendants_popsum = 0 node.n_descendants = 0 try: node._parent_node.n_descendants += (1+node.n_descendants) node._parent_node.descendants_popsum += (node.pop_store + node.descendants_popsum) except AttributeError: #could be the first time we have checked the parent try: node._parent_node.n_descendants = (1 + node.n_descendants) node._parent_node.descendants_popsum = (node.pop_store + node.descendants_popsum) except AttributeError: #this could be the root, with node._parent_node = None root_descendants=node.n_descendants #go down the tree from the root, summing up the popularity indices above, and summing up numbers of nodes for node in tree.preorder_node_iter(): if node.parent_node is None: #this is the root. node.seedplant = False node.n_ancestors = 0 node.n_pop_ancestors = 0 node.ancestors_popsum = 0.0 else: node.n_ancestors = node._parent_node.n_ancestors + 1 node.ancestors_popsum = node._parent_node.ancestors_popsum + node.pop_store if getattr(node, 'has_pop', None): node.n_pop_ancestors = node._parent_node.n_pop_ancestors + 1 else: node.n_pop_ancestors = node._parent_node.n_pop_ancestors if node.label and node.label =='Spermatophyta': node.seedplant = True print("Found plant root", file=sys.stderr) else: node.seedplant = node._parent_node.seedplant #place these values into the OTT_ptrs structure if OTT_ptrs: for node in tree.preorder_node_iter(): try: OTT_ptrs[int(node.label.rsplit("_ott",1)[1])]['pop_self'] = node.pop_store OTT_ptrs[int(node.label.rsplit("_ott",1)[1])]['pop_ancst'] = node.ancestors_popsum #nb, this includes popularity of self OTT_ptrs[int(node.label.rsplit("_ott",1)[1])]['pop_dscdt'] = node.descendants_popsum OTT_ptrs[int(node.label.rsplit("_ott",1)[1])]['n_ancst'] = node.n_ancestors OTT_ptrs[int(node.label.rsplit("_ott",1)[1])]['n_dscdt'] = node.n_descendants OTT_ptrs[int(node.label.rsplit("_ott",1)[1])]['n_pop_ancst'] = node.n_pop_ancestors OTT_ptrs[int(node.label.rsplit("_ott",1)[1])]['is_seed_plant'] = node.seedplant except (LookupError, AttributeError): pass return tree
context_name = "All life" with open(f,'r', encoding='utf-8') as treefile: treestr = treefile.read() treestart = treestr.find(']') if treestart == -1: treestart = 0 treestart = treestr.find('(',treestart) if treestart == -1: print("No tree in file {}".format(f), file=sys.stderr) continue startstr = treestr[:treestart] m = context_re.search(startstr) if m: context_name = m.group(1) try: tree = Tree.get(data=treestr[treestart:], schema="newick", suppress_leaf_node_taxa=True, terminating_semicolon_required=False, preserve_underscores=True, rooting='default-rooted') except: print("WARNING: error reading tree '{}'".format(f)) raise #check for polytomies for nd in tree.postorder_internal_node_iter(): if len(nd._child_nodes) != 2: print("WARNING: in {} there is a branch ({}) with {} child nodes: this will be removed by OneZoom".format(f, nd.label or "<unnamed>", len(nd._child_nodes)), file=sys.stderr) #These are cases where v5 of the OpenTree incorrectly gives them the same number as another species OTT_wrong_synonyms =['Geochelone_nigra_ephippium', 'Geochelone_nigra_guntheri','Geochelone_nigra_vandenburghi', 'Geochelone_nigra_microphyes', 'Pachyptila_crassirostris', 'Ducula_spilorrhoa','Ducula_luctuosa', 'Ducula_subflavescens', 'Lophura_hoogerwerfi', 'Acomys_airensis', 'Alouatta_nigerrima', 'Myotis_occultus'] #these are cases where OneZoom probably has an incorrect species (OpenTree has them as a synonym of something else) but I can't be bothered to correct the OZ tree OZ_spurious_spp = ['Cyclemys_orbiculata','Cyclemys_ovata'] ignore = OTT_wrong_synonyms + OZ_spurious_spp if args.leavesonly: