Exemple #1
0
 def __init__(self, **kwargs):
     '''
     Parameters
     ----------
     reference_tree_path: str
         Path to the file containing the reference tree, which is used to
         retroot the tree tree provided to tree
     tree_path: str
         Path to the file containing the tree to be re-rooted. This tree will
         be rerooted at the same position as the tree porovided to the 
         reference_tree
     '''
     reference_tree_path = kwargs.pop('reference_tree_path', None)
     tree_path = kwargs.pop('tree_path')
     
     logging.debug("Importing old tree from file: %s" 
                     % tree_path)
     self.tree = Tree.get(path=tree_path,
                         schema='newick')
     if reference_tree_path:
         logging.debug("Importing reference tree from file: %s" 
                         % reference_tree_path)
         self.reference_tree = Tree.get(path=reference_tree_path,
                                        schema='newick')
     else:
         self.reference_tree = reference_tree_path
     if len(kwargs) > 0:
         raise Exception("Unexpected arguments provided to Decorator class: %s" % kwargs)
Exemple #2
0
def scale_tree(tree, x):
    taxa = tree.taxon_namespace
    s_tree = Tree.get(data=tree.as_string("newick"),
                      taxon_namespace=taxa,
                      schema="newick",
                      rooting="force-rooted")

    tree.is_rooted = True
    tree.encode_bipartitions()
    s_tree.encode_bipartitions()

    mapping = {}
    mu = x[-2]
    for node in tree.postorder_node_iter():
        if node is not tree.seed_node and node.is_active:
            key = node.bipartition
            mapping[key] = node.idx

    for node in s_tree.postorder_node_iter():
        if node is not s_tree.seed_node:
            if node.bipartition in mapping:
                idx = mapping[node.bipartition]
                node.edge_length *= x[idx]

    #t_tree = Tree.get(data=s_tree.as_string("newick"),taxon_namespace=taxa,schema="newick",rooting="force-rooted")
    t_tree = Tree.get(data=s_tree.as_string("newick"),
                      schema="newick",
                      rooting="force-rooted")

    for node in t_tree.postorder_node_iter():
        if node is not t_tree.seed_node:
            node.edge_length /= mu

    return s_tree, t_tree
Exemple #3
0
def addSiblingsFromLabels(concat_tree_file, species_tree_file, split_string):
    concat_tree = Tree.get(path=concat_tree_file, schema="newick", preserve_underscores=True)
    species_tree = Tree.get(path=species_tree_file, schema="newick", preserve_underscores=True)

    concat_tree_leaves = [leaf for leaf in concat_tree.leaf_nodes()]

    for leaf_node in species_tree.leaf_nodes():
        species_label = leaf_node.taxon.label
        # print([c.taxon for c in leaf_node.sibling_nodes()])
        similar_nodes = find_similar(concat_tree_leaves, species_label)
        if (len(similar_nodes) is 1):
            string_to_add = split_string + similar_nodes[0].taxon.label.split(split_string)[1]
            new_label = leaf_node.taxon.label + string_to_add
            leaf_node.taxon.label = new_label
            continue
            # print('old: ', leaf_node.taxon.label, '   new: ', new_label)
        elif (len(similar_nodes) > 1):
            # print(leaf_node._parent_node._child_nodes)
            parent = leaf_node._parent_node

            for new_node in similar_nodes:
                parent.add_child(new_node)
            old_node = parent.remove_child(leaf_node)
            # print('old node: ', old_node, '       new nodes: ', parent.child_nodes())
            # print()
            continue

    print(species_tree.as_string(schema="newick", suppress_internal_node_labels=False))
Exemple #4
0
    def test_joel_bug(self):
        tree67 = u'''[
Thu Sep 10 15:55:28 2015: Loaded from /srv/projects/graftm/testing_files/testing_graftM/tmp_01_decorate/67_otus.tree
Thu Sep 10 15:56:18 2015: tree_67_otus saved to /srv/projects/graftm/testing_files/testing_graftM/tmp_01_decorate/67_otus.rerooted.tree
]
((((1928988:0.10866,2909029:0.15809):0.03546,((801940:0.10703,(3825327:0.12686,4298210:0.09398):0.07480):0.02560,729293:0.21465):0.01982):0.02058,((426860:0.16275,219508:0.12556):0.02403,((1128285:0.06200,4455990:0.07954):0.07525,(815912:0.12348,(3770699:0.23707,823009:0.09955):0.04225):0.01489):0.01849):0.01531):0.09184,(((2361381:0.22741,(3779572:0.06720,4363260:0.07438):0.01460):0.04187,(((((((734152:0.13251,4091454:0.12251):0.03552,((576962:0.14097,(1145804:0.14124,3106714:0.14895):0.01964):0.01668,(2014493:0.15560,(3192744:0.11018,(202294:0.07263,1138804:0.08032):0.05015):0.01277):0.01187):0.01016):0.01486,4323734:0.15004):0.00053,(759363:0.05430,4459468:0.04835):0.03216):0.01531,4322265:0.12041):0.01024,(4391683:0.11058,(229854:0.07735,(4336814:0.09937,((150571:0.07911,2730777:0.10930):0.04404,((4042859:0.25381,(717487:0.13914,4363563:0.19585):0.02281):0.02587,(((3190878:0.16480,4452949:0.07312):0.05029,(4015030:0.10339,(4438491:0.04779,(2286116:0.08699,(4251079:0.03657,4349225:0.02256):0.01189):0.01091):0.04963):0.01748):0.02917,(3014179:0.16455,(2170497:0.16101,(2107103:0.22406,951205:0.11633):0.02436):0.02574):0.03041):0.01561):0.02862):0.02589):0.01914):0.01811):0.01347):0.01451,((182569:0.14758,4363259:0.07793):0.04894,696036:0.14901):0.01514):0.01624):0.02659,(3761685:0.11278,4423155:0.16503):0.03965):0.09184);
'''
        tree70 = u'((4423550:0.17275,((4091454:0.108,4427993:0.1045)50:0.01575,((123662:0.06599,(3269889:0.12737,(104534:0.06041,734152:0.09136)20:0.00526)80:0.01669)90:0.01398,(300695:0.10755,225636:0.1317)100:0.0405)0:0.01073)40:0.0128)20:0.00782,(4377103:0.09243,((172946:0.08097,1145804:0.08645)100:0.02986,(1941303:0.0953,4332975:0.09505)90:0.00838)100:0.02206)90:0.0272,((((1931714:0.07012,(4322265:0.10071,4343117:0.13235)100:0.01842)100:0.03116,(((759363:0.05402,4459468:0.0433)100:0.02405,(294612:0.14484,2679839:0.1009)90:0.02132)70:0.01331,((((((730039:0.15444,((4015030:0.11176,(4438491:0.04568,(4349225:0.02406,(2286116:0.08501,(4251079:0.02026,4386156:0.01582)80:0.01016)40:0.0097)80:0.0168)100:0.03826)50:0.01397,(4308961:0.10766,4452949:0.05355)90:0.06215)40:0.01455)50:0.01325,(((1718272:0.12738,(150571:0.08502,(699249:0.03117,2730777:0.03253)100:0.06302)70:0.02174)60:0.03847,(((2107103:0.20025,3190878:0.14435)40:0.03601,(1824285:0.10892,3014179:0.14706)30:0.02039)0:0.01309,((3366304:0.09202,951205:0.07509)100:0.05732,2170497:0.16332)90:0.02722)10:0.01937)0:0.01868,(3064426:0.20791,((1837676:0.14477,(4363563:0.14803,4479774:0.10823)90:0.04638)90:0.03766,(4042859:0.2295,717487:0.15674)40:0.01749)20:0.01416)0:0.01063)0:0.03387)100:0.04795,4336814:0.08037)0:0.02958,(346735:0.11193,4391683:0.07639)60:0.00894)0:0.01312,1142178:0.07594)0:0.01881,(229854:0.0646,4460175:0.09289)90:0.02422)20:0.01731)0:0.01339)0:0.00777,(((2984017:0.05634,4340384:0.07722)80:0.03016,(((4371218:0.13005,(1133483:0.08797,3106714:0.09717)90:0.02053)80:0.02174,(3256066:0.08328,4022282:0.11841)90:0.03619)100:0.03392,((202294:0.06795,1138804:0.07777)100:0.05296,(3192744:0.09608,(2014493:0.11684,(180127:0.06532,4417185:0.0713)100:0.03824)100:0.0368)40:0.01663)70:0.00787)50:0.01733)10:0.0083,(222095:0.1391,(288404:0.13004,(4323734:0.07601,4446882:0.06844)60:0.01661)100:0.02863)40:0.01639)0:0.00846)0:0.0135,(((((1133369:0.07769,4336154:0.07979)100:0.11778,(((708774:0.0822,((114724:0.047,82092:0.04936)100:0.11526,(201206:0.10329,4423155:0.14181)60:0.03138)40:0.01886)80:0.03209,(202302:0.11673,3761685:0.09059)100:0.02325)90:0.02946,(((576962:0.11188,202459:0.09918)90:0.033,(213358:0.0989,(3390949:0.09853,3726184:0.09836)90:0.03298)90:0.02315)20:0.01425,202949:0.15903)0:0.01188)20:0.02709)10:0.01609,((4323100:0.0982,4409929:0.10612)60:0.01386,((696036:0.11283,(203529:0.18615,202449:0.08377)10:0.02209)30:0.02916,((2361381:0.18808,203220:0.10905)100:0.04166,(4363260:0.07208,(3779572:0.04977,114015:0.13268)70:0.02151)70:0.01055)100:0.04229)0:0.01717)0:0.01634)0:0.00519,(539547:0.12233,(4409453:0.14784,(4363259:0.05689,((268769:0.0594,266521:0.05311)100:0.04977,(182569:0.10314,4463866:0.07165)70:0.01505)100:0.04024)80:0.01602)100:0.05088)20:0.02162)0:0.0112,((573196:0.11279,((((3825327:0.11767,4298210:0.09472)100:0.07495,(836195:0.11165,801940:0.09002)100:0.02232)90:0.0347,((1928988:0.1129,(1129716:0.13293,2909029:0.13959)50:0.01858)70:0.02572,(((815912:0.12176,((219508:0.13512,(426860:0.12643,(202758:0.04748,4344033:0.03692)100:0.11429)90:0.0487)20:0.00791,((823117:0.10669,823009:0.0888)90:0.0381,3770699:0.24911)50:0.02136)40:0.02309)30:0.01326,(4455990:0.05381,(1128285:0.06585,4271527:0.03794)70:0.02727)100:0.06911)10:0.01546,4097115:0.09311)30:0.02142)20:0.01039)20:0.02855,(729293:0.18117,3871866:0.11553)90:0.03599)100:0.15854)20:0.02836,150700:0.13922)20:0.02787)0:0.00717)0:0.00859)100;'

        old_tree = Tree.get(schema='newick', data=tree67)
        tree_to_reroot = Tree.get(schema='newick', data=tree70)
        new_tree = Rerooter().reroot_by_tree(old_tree, tree_to_reroot)

        expected_lefts = old_tree.seed_node.child_nodes()[0].leaf_nodes()
        expected_rights = old_tree.seed_node.child_nodes()[1].leaf_nodes()
        for tip in expected_lefts:
            self.assertTrue(tip.taxon.label in [
                t.taxon.label
                for t in new_tree.seed_node.child_nodes()[1].leaf_nodes()
            ])
        for tip in expected_rights:
            self.assertTrue(tip.taxon.label in [
                t.taxon.label
                for t in new_tree.seed_node.child_nodes()[0].leaf_nodes()
            ])
        self.assertEqual(len(tree_to_reroot.leaf_nodes()),
                         len(new_tree.leaf_nodes()))
Exemple #5
0
    def test_ben_bug(self):
        new_tree_newick = u'(646366661:0.00571,(646777089:0.01427,(2556226606:0.0,2517129521:0.0):0.04312)0.377:0.01170,((650856936:0.01153,(((((646367708:0.01465,(638201361:0.00187,646622935:0.00573)0.940:0.01352)0.988:0.02634,(2519841469:0.06952,(650856136:0.01840,2506713669:0.02486)0.774:0.00888)0.893:0.01193)0.981:0.03778,((649738338:0.07504,(638155665:0.00613,648151945:0.00304)0.995:0.05973)0.884:0.02836,((650752390:0.11644,(2516847065:0.01707,2520801411:0.03442)0.993:0.04619)0.940:0.03278,(640592705:0.14347,637846211:0.11851)0.971:0.04593)0.940:0.03067)0.943:0.03401)0.998:0.06483,(638168675:0.17080,((649738388:0.09935,((((2540854716:0.00325,2553937573:0.00406)1.000:0.09930,(646533023:0.09868,640592823:0.06908)0.951:0.03770)1.000:0.07636,(650872422:0.05527,(650750471:0.05106,(2516847513:0.01440,2520803234:0.02517)0.998:0.05067)0.947:0.03589)0.074:0.01784)0.786:0.03804,(638155700:0.00445,648151981:0.00284)0.894:0.02131)0.995:0.08448)0.999:0.10690,((KYC55281.1:0.28954,(2540666849:0.26647,(2555938320:0.04589,2518907621:0.04631)0.970:0.05624)1.000:0.12340)0.993:0.09723,(((2515321874:0.26529,((637699780:0.01317,(2540563143:0.01361,(638165755:0.01099,638179449:0.01674)0.558:0.00611)0.964:0.01965)1.000:0.10518,(2502870849:0.06989,(648055573:0.14431,(646706666:0.11338,(637960147:0.03570,(2509663319:0.04930,(2519472088:0.03452,2515107634:0.07709)0.639:0.02134)0.957:0.03004)0.316:0.01755)0.809:0.02055)0.323:0.01213)0.991:0.07286)0.974:0.06071)0.997:0.09000,(650797088:0.06590,(639699575:0.03533,2512008957:0.12951)0.779:0.03900)1.000:0.21062)0.685:0.04985,((((640867801:0.08102,(2507462304:0.07476,(643570914:0.08474,((637897753:0.11959,((2509037835:0.14386,(648194984:0.08665,(648195418:0.04239,2506476786:0.04237)0.993:0.03477)0.668:0.01876)0.510:0.00983,((((640115295:0.06428,2540643958:0.01655)1.000:0.05043,2540643737:0.02645)0.502:0.01269,640115052:0.02482)0.987:0.04272,(2507147269:0.04181,2507146024:0.06962)0.615:0.01449)0.611:0.01456)0.992:0.03807)0.542:0.02193,((2525334810:0.02116,640099739:0.01544)0.785:0.00549,(640100248:0.00446,2525335778:0.02444)0.750:0.00227)1.000:0.12583)0.944:0.03489)0.962:0.04171)0.986:0.04499)0.793:0.02738,2509039570:0.05560)1.000:0.18840,(2505968448:0.05750,(2505971857:0.03133,2512783668:0.02305)0.185:0.01848)0.998:0.08344)0.875:0.04885,2518787893:0.16350)0.868:0.04436)1.000:0.14016)0.957:0.05998)0.998:0.08120)0.984:0.05479)0.999:0.07664,(2506713165:0.01408,((650917784:0.03595,640788680:0.07226)0.510:0.02178,(2519842728:0.03972,(646859549:0.04217,(2511672461:0.01672,(640786544:0.03901,(640793336:0.00334,(640165512:0.02037,641283602:0.00189)0.147:0.00210)0.175:0.00497)0.641:0.01093)0.991:0.02914)1.000:0.05940)0.323:0.02509)0.977:0.02843)0.960:0.02127)0.499:0.01743)0.998:0.03986,(638202197:0.00190,(644970377:0.01516,646623830:0.00752)0.678:0.00364)0.903:0.00939)0.626:0.01605);'
        old_tree_newick = u'(((((646366661:0.00564,((2517129521:0,2556226606:0):0.04302,646777089:0.01412)0.499:0.01173)0.999:0.07494,(638202197:0.0019,(644970377:0.01507,646623830:0.0075)0.738:0.00362)0.872:0.00931)0.635:0.01598,650856936:0.01308)0.995:0.04,(2506713165:0.01171,((640788680:0.07255,650917784:0.03571)0.466:0.02189,(2519842728:0.03945,(646859549:0.04217,(2511672461:0.01668,(640786544:0.03894,(640793336:0.00335,(640165512:0.02038,641283602:0.0019)0.155:0.00211)0.174:0.00496)0.668:0.01095)0.987:0.02908)1.000:0.05973)0.285:0.02499)0.985:0.02844)0.967:0.02183)0.668:0.01914,(((646367708:0.01473,(638201361:0.00188,646622935:0.00578)0.947:0.01381)0.981:0.02381,(2519841469:0.06809,(650856136:0.0182,2506713669:0.02508)0.727:0.0089)0.915:0.01182)0.971:0.03729,((649738338:0.07412,(648151945:0.00339,638155665:0.00581)0.999:0.05834)0.847:0.02714,((650752390:0.11368,(2516847065:0.01707,2520801411:0.03449)0.997:0.04731)0.943:0.033,(640592705:0.14071,637846211:0.11886)0.974:0.04714)0.907:0.02899)0.938:0.03341)0.999:0.06584,(638168675:0.16751,((649738388:0.09339,((((2540854716:0.00327,2553937573:0.00411)1.000:0.1001,(640592823:0.06966,646533023:0.10017)0.945:0.03786)0.999:0.07732,(650872422:0.0565,(650750471:0.05131,(2516847513:0.01447,2520803234:0.02528)0.998:0.05086)0.932:0.03598)0.014:0.01864)0.823:0.03947,(648151981:0.00285,638155700:0.0045)0.891:0.01979)0.998:0.08667)0.999:0.11839,((2540666849:0.25749,(2518907621:0.04615,2555938320:0.04332)0.979:0.0624)1.000:0.20869,(((2515321874:0.27886,((637699780:0.01253,(2540563143:0.01408,(638165755:0.01119,638179449:0.01933)0.559:0.0062)0.973:0.01906)1.000:0.10181,(((2509663319:0.05476,(2515107634:0.07727,2519472088:0.03736)0.549:0.02018)0.970:0.02691,(637960147:0.03454,646706666:0.12022)0.328:0.01393)0.935:0.02456,(2502870849:0.07124,648055573:0.13944)0.419:0.01578)0.997:0.07265)0.976:0.05902)0.992:0.09155,(650797088:0.06773,(639699575:0.03844,2512008957:0.12921)0.700:0.03684)1.000:0.19382)0.774:0.0559,((2518787893:0.1617,(2512783668:0.01562,(2505971857:0.02991,2505968448:0.06931)0.820:0.01544)1.000:0.10132)0.000:0.03917,((640867801:0.07687,(2507462304:0.07769,(((637897753:0.11989,((2509037835:0.14075,(648194984:0.08661,(648195418:0.04254,2506476786:0.04232)0.986:0.0348)0.722:0.0191)0.553:0.00998,((((640115295:0.06424,2540643958:0.01643)1.000:0.05042,2540643737:0.02653)0.542:0.01245,640115052:0.0251)0.986:0.04265,(2507146024:0.06963,2507147269:0.0417)0.641:0.01435)0.611:0.01449)0.989:0.03824)0.424:0.02187,((640099739:0.01547,2525334810:0.02122)0.833:0.00545,(640100248:0.00445,2525335778:0.02442)0.761:0.0023)1.000:0.12528)0.944:0.03961,643570914:0.0938)0.959:0.03885)0.984:0.04627)0.758:0.02972,2509039570:0.05614)1.000:0.19872)0.185:0.04422)0.999:0.13795)0.646:0.06267)0.977:0.07517)0.979:0.05715);'
        old_tree = Tree.get(schema='newick', data=old_tree_newick)
        tree_to_reroot = Tree.get(schema='newick', data=new_tree_newick)
        r = Rerooter()
        reann = Reannotator()

        new_tree = r.reroot_by_tree(r.reroot(old_tree),
                                    r.reroot(tree_to_reroot))

        expected_lefts = old_tree.seed_node.child_nodes()[0].leaf_nodes()
        expected_rights = old_tree.seed_node.child_nodes()[1].leaf_nodes()
        for tip in expected_lefts:
            self.assertTrue(tip.taxon.label in [
                t.taxon.label
                for t in new_tree.seed_node.child_nodes()[1].leaf_nodes()
            ])
        for tip in expected_rights:
            self.assertTrue(tip.taxon.label in [
                t.taxon.label
                for t in new_tree.seed_node.child_nodes()[0].leaf_nodes()
            ])
        self.assertEqual(len(tree_to_reroot.leaf_nodes()),
                         len(new_tree.leaf_nodes()))
Exemple #6
0
def run_tree_regression(arg, taxa):
    taxon_namespace = TaxonNamespace([taxon['id'] for taxon in taxa['taxa']])
    tree_format = 'newick'
    with open(arg.tree) as fp:
        if next(fp).upper().startswith('#NEXUS'):
            tree_format = 'nexus'
    if tree_format == 'nexus':
        tree = Tree.get(
            path=arg.tree,
            schema='nexus',
            tree_offset=0,
            preserve_underscores=True,
            taxon_namespace=taxon_namespace,
        )
    else:
        tree = Tree.get(
            path=arg.tree,
            schema='newick',
            tree_offset=0,
            preserve_underscores=True,
            taxon_namespace=taxon_namespace,
        )
    tree.resolve_polytomies(update_bipartitions=True)
    setup_indexes(tree, False)
    taxa2 = [{'date': taxon['attributes']['date']} for taxon in taxa['taxa']]
    initialize_dates_from_taxa(tree, taxa2)
    return regression(tree)
Exemple #7
0
def generate_ATT_from_files(seqaln,
                            mattype,
                            workdir,
                            treefile,
                            otu_json,
                            ingroup_mrca=None):
    """Build an ATT object without phylesystem.
    If no ingroup mrca ott_id is provided, will use all taxa in tree to calc mrca."""
    aln = DnaCharacterMatrix.get(path=seqaln, schema=mattype)
    for tax in aln.taxon_namespace:
        tax.label = tax.label.replace(" ", "_") #Forcing all spaces to underscore UGH
    tre = Tree.get(path=treefile,
                   schema="newick",
                   preserve_underscores=True,
                   taxon_namespace=aln.taxon_namespace)
    with open(otu_json) as data_file:
        otu_dict = json.load(data_file)
    for tax in aln:
        assert tax.label in otu_dict
    tre = Tree.get(path=treefile,
                   schema="newick",
                   preserve_underscores=True,
                   taxon_namespace=aln.taxon_namespace)
    otu_newick = tre.as_string(schema="newick")
    if ingroup_mrca:
        ott_mrca = int(ingroup_mrca)
    else:
        ott_ids = [otu_dict[otu].get['^ot:ottId'] for otu in otu_dict]
        ott_mrca = get_mrca_ott(ott_ids)
    return AlignTreeTax(otu_newick, otu_dict, aln, ingroup_mrca=ott_mrca, workdir=workdir)
Exemple #8
0
 def test_hello_world(self):
     self.assert_tree_equal_no_labels(
         u'((C,(D,E):2.0),(A,B):4);',
         Rerooter().reroot_by_tree(
             Tree.get(schema='newick',
                      data=u'((A,B):1,(C,D):2);',
                      rooting="force-rooted"),
             Tree.get(schema='newick',
                      data=u'((A,B):1,(C,(D,E):2):3);',
                      rooting="force-rooted")))
Exemple #9
0
def refRFDistance(t1NexFilePath, t2NexFilePath):
    tns = TaxonNamespace()
    nexTree1 = Tree.get(unconstrained_taxa_accumulation_mode=True,
                        path=t1NexFilePath,
                        schema="nexus",
                        taxon_namespace=tns)
    nexTree2 = Tree.get(unconstrained_taxa_accumulation_mode=True,
                        path=t2NexFilePath,
                        schema="nexus",
                        taxon_namespace=tns)
    return (custom_distance(nexTree1, nexTree2))
def get_tree_from_synth(ott_ids, label_format="name", citation="cites.txt"):
    assert label_format in ['id', 'name', 'name_and_id']
    resp = treemachine.get_synth_tree_pruned(ott_ids=ott_ids,
                                             label_format=label_format)
    cites = ''
    sys.stdout.write("gathering citations")
    for study in resp['supporting_studies']:
        sys.stdout.write('.')
        study = study.split('@')[0]
        query = {"ot:studyId": study}
        new_cite = oti.find_studies(query_dict=query, verbose=True)
        #print new_cite[0].keys()
        cites = cites + '\n' + to_string(
            new_cite[0]['ot:studyPublicationReference']
        ) + new_cite[0]['ot:studyPublication']

#  cites = cites + '\n' +phylesystemref + synthref
    with open(citation, 'w') as citfile:
        citfile.write(cites)
    sys.stdout.write("citations printed to {}\n".format(citation))
    tre = Tree.get(data=resp['newick'],
                   schema="newick",
                   suppress_internal_node_taxa=True)
    tre.suppress_unifurcations()
    return tre
Exemple #11
0
 def __init__(self, tree, otu_dict, alignment, ingroup_mrca, workdir, config_obj,
              schema='newick', taxon_namespace=None):
     debug("build ATT class")
     self.aln = alignment
     assert isinstance(self.aln, datamodel.charmatrixmodel.DnaCharacterMatrix), \
             ("your aln '%s' is not a DnaCharacterMatrix" % alignment)
     self.tre = Tree.get(data=tree,
                         schema=schema,
                         preserve_underscores=True,
                         taxon_namespace=self.aln.taxon_namespace)
     assert (self.tre.taxon_namespace is self.aln.taxon_namespace), "tre and aln taxon_namespace are not identical"
     assert isinstance(otu_dict, dict), ("otu_dict '%s' is not of type dict" % otu_dict)
     self.otu_dict = otu_dict
     self.config = config_obj
     self.ps_otu = 1  # iterator for new otu IDs
     self._reconcile()
     self._reconcile_names()
     self.workdir = os.path.abspath(workdir)
     if not os.path.exists(self.workdir):
         os.makedirs(self.workdir)
     assert int(ingroup_mrca), ("your ingroup_mrca '%s' is not an integer." % ingroup_mrca)
     self.mrca_ott = ingroup_mrca  # ott_ingroup mrca can be pulled directly from phylesystem
     self.orig_seqlen = []  # will get filled in later...
     self.gb_dict = {}  # has all info about new blast seq
     self._reconciled = False
     self.unpubl_otu_json = None
 def test_bootstraps_in_annotated_tree_alongside_empty_taxa(self):
     self.assertEquals({u'a': [],
                        u'b': [],
                        u'c': ['tax'],
                        u'd': ['tax']},
                       TaxonomyExtractor().taxonomy_from_annotated_tree(\
                         Tree.get(data="(a,(b,(c,d:0.2)'0.2:tax')0.01973:0.9)root;", schema='newick')))
    def test_relabel(self):  ## JUST PUT A LIST OF IDS TO SIMPLIFY
        jetz = OT.get_tree(study_id='ot_809',
                           tree_id='tree1',
                           tree_format="newick",
                           label_format="ot:ottId")
        jetz_tree = Tree.get(string=jetz.response_dict['content'].decode(),
                             schema='newick',
                             suppress_internal_node_taxa=True,
                             suppress_leaf_node_taxa=True)
        tips = [tip.label for tip in jetz_tree.leaf_node_iter()]
        ott_ids = set()
        for tip in tips:
            try:
                ott_ids.add(int(tip))
            except:
                pass
        ret = taxonomy_helpers.labelled_induced_synth(ott_ids=list(ott_ids),
                                                      label_format='name')
        tips = [
            tip.taxon.label for tip in ret['labelled_tree'].leaf_node_iter()
            if tip.taxon
        ]
        assert len(tips) == 6624

        ret = taxonomy_helpers.labelled_induced_synth(
            ott_ids=list(ott_ids), label_format='name_and_id')
        nodes = [
            node.taxon.label for node in ret['labelled_tree'] if node.taxon
        ]
        assert 'MRCA of taxa in Amazona auropalliata_ott1118 Amazona oratrix_ott1119' in nodes, nodes
Exemple #14
0
 def test_bootstraps_in_annotated_tree_alongside_empty_taxa(self):
     self.assertEquals({u'a': [],
                        u'b': [],
                        u'c': ['tax'],
                        u'd': ['tax']},
                       TaxonomyExtractor().taxonomy_from_annotated_tree(\
                         Tree.get(data="(a,(b,(c,d:0.2)'0.2:tax')0.01973:0.9)root;", schema='newick')))
Exemple #15
0
def main( args ):

    tree = Tree.get( path=args.input, schema="newick", preserve_underscores=True )

    tree = prune_clock_outliers( tree, iqd=args.iqd, clock_rate=args.clock_rate )

    tree.write( path=args.output, schema="newick" )
Exemple #16
0
 def test_input_unrooted_tree(self):
     otu61 = os.path.join(path_to_data, '61_otus.gpkg', '61_otus.refpkg')
     with tempfile.NamedTemporaryFile(suffix='.fa') as bad_alignment:
         with tempdir.TempDir() as tmp:
             Create(prerequisites).main(
                 taxtastic_taxonomy=os.path.join(otu61,
                                                 '61_otus_taxonomy.csv'),
                 taxtastic_seqinfo=os.path.join(otu61,
                                                '61_otus_seqinfo.csv'),
                 # created with newick_utils:
                 # nw_prune test/data/61_otus.gpkg/61_otus.refpkg/61_otus.tre 4459468 >test/data/61_otus.without_4459468.tre
                 unrooted_tree=os.path.join(path_to_data, 'create',
                                            '61_otus.without_4459468.tre'),
                 sequences=os.path.join(path_to_data, 'create',
                                        '61_otus.without_4459468.fasta'),
                 alignment=os.path.join(
                     path_to_data, 'create',
                     '61_otus.without_4459468.aln.fasta'),
                 prefix=tmp,
                 force=True)
             gpkg = GraftMPackage.acquire(tmp)
             tree = Tree.get(
                 schema='newick',
                 data=open(gpkg.reference_package_tree_path()).readline())
             self.assertEqual(21, len(tree.leaf_nodes()))
Exemple #17
0
def ete_to_dendropy(tree):
    from dendropy import Tree as DTree
    char_matrix = ete_to_dendropy_cm(tree)
    taxon_namespace = char_matrix.taxon_namespace
    dendro_tree = DTree.get(data=tree.write(format=1),
                            schema='newick',
                            taxon_namespace=taxon_namespace)
    return dendro_tree, char_matrix
Exemple #18
0
def test_yule(script_runner, execution_number, datadir):
    backbone = os.path.join(datadir, "stem2.backbone.tre")
    taxonomy = os.path.join(datadir, "stem2.taxonomy.tre")
    taxed = Tree.get(path=taxonomy, schema="newick")
    bbone = Tree.get(path=backbone, schema="newick", rooting="default-rooted")
    result = script_runner.run("tact_add_taxa", "--taxonomy", taxonomy,
                               "--backbone", backbone, "--output",
                               ".tact-pytest-yule", "-vv", "--yule")
    assert result.returncode == 0
    output = ".tact-pytest-yule.newick.tre"
    tacted = Tree.get(path=output, schema="newick", rooting="default-rooted")
    ss = tacted.as_ascii_plot()
    sys.stderr.write(ss)
    result = script_runner.run("tact_check_results", output, "--taxonomy",
                               taxonomy, "--backbone", backbone, "--output",
                               ".tact-pytest-yule.check.csv", "--cores=1")
    assert result.returncode == 0
    return (tacted, taxed, bbone)
def remove_branch_lengths(f, out):

    t = Tree.get(file=open(f, 'r'), schema="newick")
    new = open(out, 'w+')

    for e in t.edges():
        e.length = None

    t.write(file=new, schema="newick")
Exemple #20
0
 def test_branch_lengths(self):
     '''https://github.com/geronimp/graftM/issues/192'''
     taxes = TaxonomyExtractor().taxonomy_from_annotated_tree(
         Tree.get(path=os.path.join(path_to_data, 'create', 'sulfitereductase.ben.tree'), schema='newick'))
     self.assertEquals([u'Aanerobic sulfite reductase asrC',
                        u'Anaerobic sulfite reductase asrC Group 3',
                        u'Unknown alpha and beta subunits',
                        u'0.856_PFAM_NIR_SIR,NIR_SIR_ferr'], # number is actually in the clade name
                       taxes['T506DRAFT_scaffold00010.10_60~2561511230'])
Exemple #21
0
def scale_tree(f_name, n):

    t = Tree.get(file=open(f_name, 'r'), schema="newick", tree_offset=0)

    for e in t.edges():
        if e.length is not None:
            e.length = float(n*float(e.length))
    
    t.write(file=open(f_name.replace('.mt', '') + '_' + str(n).replace('.', '_') + '.mt', 'w+'), schema="newick")
Exemple #22
0
 def test_remove_sequences_with_named_internal_nodes(self):
     tc  = DendropyTreeCleaner()
     tree = Tree.get(data="('Asulf_Archaeoglobus.1_2280~2522125074':7.17,(('Afulgi_764~2528311132':0.0,'CP006577_764~2588253768':0.0):0.0,'AE000782_746~638154502':0.0)'s__Archaeoglobus fulgidus':7.555):1.461;\n",
                     schema='newick')
     tc.remove_sequences(tree,
                         ['CP006577_764~2588253768',
                          'Afulgi_764~2528311132'])
     self.assertEqual("(Asulf_Archaeoglobus.1_2280~2522125074:7.17,AE000782_746~638154502:7.555):1.461",
                      str(tree))
Exemple #23
0
    def assert_tree_equal_no_labels_deprecated(self, expected_newick, observed_tree):
        expected = Tree.get(schema='newick', data=expected_newick)

        for node in expected.nodes():
            if not node.is_leaf():
                node.label = None
        for node in observed_tree.nodes():
            if not node.is_leaf():
                node.label = None
        self.assertEqual(str(expected), str(observed_tree))
Exemple #24
0
    def assert_tree_equal_no_labels_deprecated(self, expected_newick, observed_tree):
        expected = Tree.get(schema='newick', data=expected_newick)

        for node in expected.nodes(): 
            if not node.is_leaf():
                node.label = None
        for node in observed_tree.nodes(): 
            if not node.is_leaf():
                node.label = None
        self.assertEqual(str(expected), str(observed_tree))
Exemple #25
0
 def test_ben_bug(self):
     new_tree_newick = u'(646366661:0.00571,(646777089:0.01427,(2556226606:0.0,2517129521:0.0):0.04312)0.377:0.01170,((650856936:0.01153,(((((646367708:0.01465,(638201361:0.00187,646622935:0.00573)0.940:0.01352)0.988:0.02634,(2519841469:0.06952,(650856136:0.01840,2506713669:0.02486)0.774:0.00888)0.893:0.01193)0.981:0.03778,((649738338:0.07504,(638155665:0.00613,648151945:0.00304)0.995:0.05973)0.884:0.02836,((650752390:0.11644,(2516847065:0.01707,2520801411:0.03442)0.993:0.04619)0.940:0.03278,(640592705:0.14347,637846211:0.11851)0.971:0.04593)0.940:0.03067)0.943:0.03401)0.998:0.06483,(638168675:0.17080,((649738388:0.09935,((((2540854716:0.00325,2553937573:0.00406)1.000:0.09930,(646533023:0.09868,640592823:0.06908)0.951:0.03770)1.000:0.07636,(650872422:0.05527,(650750471:0.05106,(2516847513:0.01440,2520803234:0.02517)0.998:0.05067)0.947:0.03589)0.074:0.01784)0.786:0.03804,(638155700:0.00445,648151981:0.00284)0.894:0.02131)0.995:0.08448)0.999:0.10690,((KYC55281.1:0.28954,(2540666849:0.26647,(2555938320:0.04589,2518907621:0.04631)0.970:0.05624)1.000:0.12340)0.993:0.09723,(((2515321874:0.26529,((637699780:0.01317,(2540563143:0.01361,(638165755:0.01099,638179449:0.01674)0.558:0.00611)0.964:0.01965)1.000:0.10518,(2502870849:0.06989,(648055573:0.14431,(646706666:0.11338,(637960147:0.03570,(2509663319:0.04930,(2519472088:0.03452,2515107634:0.07709)0.639:0.02134)0.957:0.03004)0.316:0.01755)0.809:0.02055)0.323:0.01213)0.991:0.07286)0.974:0.06071)0.997:0.09000,(650797088:0.06590,(639699575:0.03533,2512008957:0.12951)0.779:0.03900)1.000:0.21062)0.685:0.04985,((((640867801:0.08102,(2507462304:0.07476,(643570914:0.08474,((637897753:0.11959,((2509037835:0.14386,(648194984:0.08665,(648195418:0.04239,2506476786:0.04237)0.993:0.03477)0.668:0.01876)0.510:0.00983,((((640115295:0.06428,2540643958:0.01655)1.000:0.05043,2540643737:0.02645)0.502:0.01269,640115052:0.02482)0.987:0.04272,(2507147269:0.04181,2507146024:0.06962)0.615:0.01449)0.611:0.01456)0.992:0.03807)0.542:0.02193,((2525334810:0.02116,640099739:0.01544)0.785:0.00549,(640100248:0.00446,2525335778:0.02444)0.750:0.00227)1.000:0.12583)0.944:0.03489)0.962:0.04171)0.986:0.04499)0.793:0.02738,2509039570:0.05560)1.000:0.18840,(2505968448:0.05750,(2505971857:0.03133,2512783668:0.02305)0.185:0.01848)0.998:0.08344)0.875:0.04885,2518787893:0.16350)0.868:0.04436)1.000:0.14016)0.957:0.05998)0.998:0.08120)0.984:0.05479)0.999:0.07664,(2506713165:0.01408,((650917784:0.03595,640788680:0.07226)0.510:0.02178,(2519842728:0.03972,(646859549:0.04217,(2511672461:0.01672,(640786544:0.03901,(640793336:0.00334,(640165512:0.02037,641283602:0.00189)0.147:0.00210)0.175:0.00497)0.641:0.01093)0.991:0.02914)1.000:0.05940)0.323:0.02509)0.977:0.02843)0.960:0.02127)0.499:0.01743)0.998:0.03986,(638202197:0.00190,(644970377:0.01516,646623830:0.00752)0.678:0.00364)0.903:0.00939)0.626:0.01605);'
     old_tree_newick = u'(((((646366661:0.00564,((2517129521:0,2556226606:0):0.04302,646777089:0.01412)0.499:0.01173)0.999:0.07494,(638202197:0.0019,(644970377:0.01507,646623830:0.0075)0.738:0.00362)0.872:0.00931)0.635:0.01598,650856936:0.01308)0.995:0.04,(2506713165:0.01171,((640788680:0.07255,650917784:0.03571)0.466:0.02189,(2519842728:0.03945,(646859549:0.04217,(2511672461:0.01668,(640786544:0.03894,(640793336:0.00335,(640165512:0.02038,641283602:0.0019)0.155:0.00211)0.174:0.00496)0.668:0.01095)0.987:0.02908)1.000:0.05973)0.285:0.02499)0.985:0.02844)0.967:0.02183)0.668:0.01914,(((646367708:0.01473,(638201361:0.00188,646622935:0.00578)0.947:0.01381)0.981:0.02381,(2519841469:0.06809,(650856136:0.0182,2506713669:0.02508)0.727:0.0089)0.915:0.01182)0.971:0.03729,((649738338:0.07412,(648151945:0.00339,638155665:0.00581)0.999:0.05834)0.847:0.02714,((650752390:0.11368,(2516847065:0.01707,2520801411:0.03449)0.997:0.04731)0.943:0.033,(640592705:0.14071,637846211:0.11886)0.974:0.04714)0.907:0.02899)0.938:0.03341)0.999:0.06584,(638168675:0.16751,((649738388:0.09339,((((2540854716:0.00327,2553937573:0.00411)1.000:0.1001,(640592823:0.06966,646533023:0.10017)0.945:0.03786)0.999:0.07732,(650872422:0.0565,(650750471:0.05131,(2516847513:0.01447,2520803234:0.02528)0.998:0.05086)0.932:0.03598)0.014:0.01864)0.823:0.03947,(648151981:0.00285,638155700:0.0045)0.891:0.01979)0.998:0.08667)0.999:0.11839,((2540666849:0.25749,(2518907621:0.04615,2555938320:0.04332)0.979:0.0624)1.000:0.20869,(((2515321874:0.27886,((637699780:0.01253,(2540563143:0.01408,(638165755:0.01119,638179449:0.01933)0.559:0.0062)0.973:0.01906)1.000:0.10181,(((2509663319:0.05476,(2515107634:0.07727,2519472088:0.03736)0.549:0.02018)0.970:0.02691,(637960147:0.03454,646706666:0.12022)0.328:0.01393)0.935:0.02456,(2502870849:0.07124,648055573:0.13944)0.419:0.01578)0.997:0.07265)0.976:0.05902)0.992:0.09155,(650797088:0.06773,(639699575:0.03844,2512008957:0.12921)0.700:0.03684)1.000:0.19382)0.774:0.0559,((2518787893:0.1617,(2512783668:0.01562,(2505971857:0.02991,2505968448:0.06931)0.820:0.01544)1.000:0.10132)0.000:0.03917,((640867801:0.07687,(2507462304:0.07769,(((637897753:0.11989,((2509037835:0.14075,(648194984:0.08661,(648195418:0.04254,2506476786:0.04232)0.986:0.0348)0.722:0.0191)0.553:0.00998,((((640115295:0.06424,2540643958:0.01643)1.000:0.05042,2540643737:0.02653)0.542:0.01245,640115052:0.0251)0.986:0.04265,(2507146024:0.06963,2507147269:0.0417)0.641:0.01435)0.611:0.01449)0.989:0.03824)0.424:0.02187,((640099739:0.01547,2525334810:0.02122)0.833:0.00545,(640100248:0.00445,2525335778:0.02442)0.761:0.0023)1.000:0.12528)0.944:0.03961,643570914:0.0938)0.959:0.03885)0.984:0.04627)0.758:0.02972,2509039570:0.05614)1.000:0.19872)0.185:0.04422)0.999:0.13795)0.646:0.06267)0.977:0.07517)0.979:0.05715);'
     old_tree = Tree.get(schema='newick', data=old_tree_newick)
     tree_to_reroot = Tree.get(schema='newick', data=new_tree_newick)
     r = Rerooter()
     reann = Reannotator()
     
     new_tree = r.reroot_by_tree( 
         r.reroot(old_tree),
         r.reroot(tree_to_reroot))
     
     expected_lefts = old_tree.seed_node.child_nodes()[0].leaf_nodes()
     expected_rights = old_tree.seed_node.child_nodes()[1].leaf_nodes()
     for tip in expected_lefts:
         self.assertTrue(tip.taxon.label in [t.taxon.label for t in new_tree.seed_node.child_nodes()[1].leaf_nodes()])
     for tip in expected_rights:
         self.assertTrue(tip.taxon.label in [t.taxon.label for t in new_tree.seed_node.child_nodes()[0].leaf_nodes()])
     self.assertEqual(len(tree_to_reroot.leaf_nodes()), len(new_tree.leaf_nodes()))
Exemple #26
0
def main():
    d1 = sys.argv[1]
    d2 = sys.argv[2]
    d1_name = basename(d1)
    d2_name = basename(d2)
    print('og {} {}'.format(d1_name, d2_name))
    d1_files = list(sorted(glob(join(d1, '*', 'RAxML_bipartitions.bipart'))))
    d2_files = list(sorted(glob(join(d2, '*', 'RAxML_bipartitions.bipart'))))
    assert len(d1_files) == len(d2_files)
    for fn1, fn2 in zip(d1_files, d2_files):
        t1 = Tree.get(path=fn1, schema='newick')
        t2 = Tree.get(path=fn2, schema='newick')
        assert tostr(t1) == tostr(t2)
        t1_og = basename(dirname(fn1))
        t2_og = basename(dirname(fn2))
        assert t1_og == t2_og
        labs = zip(get_node_labels(t1), get_node_labels(t2))
        for l1, l2 in labs:
            print(t1_og, l1, l2)
def run_tact(script_runner, datadir, stem):
    backbone = os.path.join(datadir, stem + ".backbone.tre")
    taxonomy = os.path.join(datadir, stem + ".taxonomy.tre")
    taxed = Tree.get(path=taxonomy, schema="newick")
    bbone = Tree.get(path=backbone, schema="newick")
    result = script_runner.run("tact_add_taxa", "--taxonomy", taxonomy,
                               "--backbone", backbone, "--output",
                               ".tact-pytest-" + stem, "-vv")
    assert result.returncode == 0
    output = ".tact-pytest-" + stem + ".newick.tre"
    tacted = Tree.get(path=output, schema="newick")
    ss = tacted.as_ascii_plot()
    sys.stderr.write(ss)
    result = script_runner.run("tact_check_results", output, "--taxonomy",
                               taxonomy, "--backbone", backbone, "--output",
                               ".tact-pytest-" + stem + ".check.csv",
                               "--cores=1")
    assert result.returncode == 0
    return (tacted, taxed, bbone)
Exemple #28
0
def write_and_read_nexus(filename, header, tree_id, tree_str):
    tns = TaxonNamespace(is_case_sensitive=True)
    # write a temp file containing  tree
    with open(filename, "w") as f:
        for line in header + ["tree " + tree_id + " " + tree_str]:
            f.write(line + "\n");
    # read tree as dendropy tree
    tree = Tree.get(path=filename, schema="nexus",
                    taxon_namespace=tns, case_sensitive_taxon_labels=True, 
                    suppress_internal_node_taxa=False)
    return tree
Exemple #29
0
def main(OT_filehandle, OTTs_to_keep, outfile):
    #read in tree, but don't create taxa (faster)\
    tree = Tree.get(stream=OT_filehandle, schema="newick", suppress_leaf_node_taxa=True)
    for node in tree.postorder_node_iter():
        if hasattr(node, 'keep') or node_label_in(node, OTTs_to_keep):
            if node.parent_node: #this is not the root
                node.parent_node.keep=True
        else:
            if not hasattr(node, 'keep'):
                node.parent_node.remove_child(node, suppress_unifurcations=False)
    tree.write(file=outfile, schema='newick', suppress_leaf_node_labels=False) 
Exemple #30
0
def root_tree(f_name, out):

    t = Tree.get(path=f_name, schema="newick", rooting='force-rooted')

    t.reroot_at_midpoint()

    f = open(out, "w+")
    t.write(path=out,
            schema="newick",
            suppress_rooting=True,
            real_value_format_specifier="12.8f")
    f.close()
def get_bipart(ts, species):
    t = Tree.get(data=ts, schema='newick')
    hash_node = t.find_node(lambda n: n.label == '#1')
    sub_nodes = set(n.taxon.label for n in hash_node.leaf_iter())
    all_nodes = set(n.taxon.label for n in t.leaf_node_iter())
    b1 = ''.join('1' if l in sub_nodes else ('0' if l in all_nodes else '?')
                 for l in species)
    b2 = ''.join(neg(v) for v in b1)

    assert b1 != b2
    assert len(b1) == len(b2) and len(b1) == len(species)
    return min(b1, b2)
Exemple #32
0
    def assert_tree_equal_no_labels(self, expected_newick, observed_tree):
        '''should include some tree ordering because ordering of children is not relevant, but eh for now'''
        expected = Tree.get(data=expected_newick, schema='newick', rooting='force-rooted')
        def prep_tree(tree):
            for n in tree.internal_nodes(): 
                n.label = None
            if n.edge.length is None: n.edge.length=0.0
            tree = self.sort_tree(tree)
        prep_tree(expected)
        prep_tree(observed_tree)

        self.assertEqual(str(self.sort_tree(expected)), str(self.sort_tree(observed_tree)))
Exemple #33
0
    def assert_tree_equal_no_labels(self, expected_newick, observed_tree):
        '''should include some tree ordering because ordering of children is not relevant, but eh for now'''
        expected = Tree.get(data=expected_newick, schema='newick', rooting='force-rooted')
        def prep_tree(tree):
            for n in tree.internal_nodes():
                n.label = None
            if n.edge.length is None: n.edge.length=0.0
            tree = self.sort_tree(tree)
        prep_tree(expected)
        prep_tree(observed_tree)

        self.assertEqual(str(self.sort_tree(expected)), str(self.sort_tree(observed_tree)))
Exemple #34
0
    def test_write_fasttree_newick(self):
        tc = DendropyTreeCleaner()
        tree = Tree.get(data="((a,b),(d,e))root;", schema='newick')
        self.assertEqual("((a,b),(d,e));\n", self.clean(tc, tree))

        # Internal labels should be removed.
        tree = Tree.get(data="((a_2,b)c,(d,e)f)root;", schema='newick')
        self.assertEqual("((a_2,b),(d,e));\n", self.clean(tc, tree))

        # Quoted spaces should become underscores.
        tree = Tree.get(data="(('a 2',b),(d,e))root;", schema='newick')
        self.assertEqual("((a_2,b),(d,e));\n", self.clean(tc, tree))

        # Test underscores that are quoted.
        tree = Tree.get(data="(('a_2',b),(d,e))root;", schema='newick')
        self.assertEqual("((a_2,b),(d,e));\n", self.clean(tc, tree))

        # Test dashes
        tree = Tree.get(data="((ANME-2dV10_01644,b),(d,e))root;",
                        schema='newick')
        self.assertEqual("((ANME-2dV10_01644,b),(d,e));\n",
                         self.clean(tc, tree))

        # A more real world example with '~' characters (which never mattered actually).
        tree = Tree.get(
            data=
            "('Asulf_Archaeoglobus.1_2280~2522125074':7.17,(('Afulgi_764~2528311132':0.0,'CP006577_764~2588253768':0.0):0.0,'AE000782_746~638154502':0.0)'s__Archaeoglobus fulgidus':7.555):1.461;\n",
            schema='newick')
        self.assertEqual(
            "(Asulf_Archaeoglobus.1_2280~2522125074:7.17,((Afulgi_764~2528311132:0.0,CP006577_764~2588253768:0.0):0.0,AE000782_746~638154502:0.0):7.555):1.461;\n",
            self.clean(tc, tree))
Exemple #35
0
    def test_reroot_trifurcated_tree_at_longest_child(self):
        test_tree_1 = Tree.get(schema='newick',
                               data=u'(A:0.1,B:0.2,(C:0.3,D:0.4):0.5);')
        test_tree_2 = Tree.get(schema='newick',
                               data=u'(A:0.5,B:0.2,(C:0.3,D:0.4):0.1);')
        test_tree_3 = Tree.get(schema='newick',
                               data=u'(A:0.2,B:0.5,(C:0.3,D:0.4):0.1);')

        expected_test_tree_1 = str(
            Tree.get(schema='newick',
                     data=u"((C:0.3,D:0.4):0.25,(A:0.1,B:0.2):0.25);"))
        expected_test_tree_2 = str(
            Tree.get(schema='newick',
                     data=u"(A:0.25,(B:0.2,(C:0.3,D:0.4):0.1):0.25);"))
        expected_test_tree_3 = str(
            Tree.get(schema='newick',
                     data=u"(B:0.25,(A:0.2,(C:0.3,D:0.4):0.1):0.25);"))

        rerooted_test_tree_1 = str(Rerooter().reroot(test_tree_1)).strip()
        rerooted_test_tree_2 = str(Rerooter().reroot(test_tree_2)).strip()
        rerooted_test_tree_3 = str(Rerooter().reroot(test_tree_3)).strip()

        self.assertEqual(rerooted_test_tree_1, expected_test_tree_1)
        self.assertEqual(rerooted_test_tree_2, expected_test_tree_2)
        self.assertEqual(rerooted_test_tree_3, expected_test_tree_3)
Exemple #36
0
def runProgram(referenceTreeFile,
               sampleTreeList,
               bootstrap_cutoff_value=80,
               output_tree="output_tree.tre",
               verbose=False,
               quiet=False,
               timing=False):
    if verbose:
        print("Reference Tree: ", referenceTreeFile)
        print("Sample Tree List: ", sampleTreeList)
        print("Bootstrap Cutoff Value: ", bootstrap_cutoff_value)
        print("Output Tree File: ", output_tree)

    if timing:
        verbose = False

    try:
        reference_tree = Tree.get(path=referenceTreeFile,
                                  schema="newick",
                                  preserve_underscores=True)
    except:
        print(
            "Error with file '{}': please only use files with newick tree format"
            .format(referenceTreeFile))
        sys.exit()

    reference_tree_namespace = reference_tree.taxon_namespace

    sample_tree_list = readTrees(sampleTreeList, reference_tree_namespace,
                                 quiet)

    # Check if gene tree taxon namespace matches reference tree
    for s in sample_tree_list:
        if not reference_tree_namespace.has_taxa_labels(
                s.taxon_namespace.labels()):
            print(
                'Error: reference tree is of a different taxon namespace as the sample trees'
            )
            return

    full_quartet_dictionary = buildFullSupport(sample_tree_list,
                                               bootstrap_cutoff_value, verbose,
                                               quiet, timing)
    if verbose:
        print("Full quartet dictionary with support values")
        [
            print(quartet, full_quartet_dictionary[quartet])
            for quartet in full_quartet_dictionary
        ]
        print()
    buildLabeledTree(referenceTreeFile, full_quartet_dictionary, output_tree,
                     quiet, timing)
Exemple #37
0
def summary_to_nw_str(mcmc_tree_filename):
    calc_summary_tree(mcmc_tree_filename)
    # convert summary nexus tree to newick for ete3
    tns = TaxonNamespace(is_case_sensitive=True)
    filename = mcmc_tree_filename + "_summary.tree"
    dp_tree = Tree.get(path=filename, 
                       schema="nexus",
                       taxon_namespace=tns,
                       case_sensitive_taxon_labels=True,
                       suppress_internal_node_taxa=False)

    # drop all annotations and illegal characters
    return dp_tree.as_string('newick', suppress_annotations=True)[5:].rstrip("\n")                   
Exemple #38
0
    def test_joel_bug(self):
        tree67 = u'''[
Thu Sep 10 15:55:28 2015: Loaded from /srv/projects/graftm/testing_files/testing_graftM/tmp_01_decorate/67_otus.tree
Thu Sep 10 15:56:18 2015: tree_67_otus saved to /srv/projects/graftm/testing_files/testing_graftM/tmp_01_decorate/67_otus.rerooted.tree
]
((((1928988:0.10866,2909029:0.15809):0.03546,((801940:0.10703,(3825327:0.12686,4298210:0.09398):0.07480):0.02560,729293:0.21465):0.01982):0.02058,((426860:0.16275,219508:0.12556):0.02403,((1128285:0.06200,4455990:0.07954):0.07525,(815912:0.12348,(3770699:0.23707,823009:0.09955):0.04225):0.01489):0.01849):0.01531):0.09184,(((2361381:0.22741,(3779572:0.06720,4363260:0.07438):0.01460):0.04187,(((((((734152:0.13251,4091454:0.12251):0.03552,((576962:0.14097,(1145804:0.14124,3106714:0.14895):0.01964):0.01668,(2014493:0.15560,(3192744:0.11018,(202294:0.07263,1138804:0.08032):0.05015):0.01277):0.01187):0.01016):0.01486,4323734:0.15004):0.00053,(759363:0.05430,4459468:0.04835):0.03216):0.01531,4322265:0.12041):0.01024,(4391683:0.11058,(229854:0.07735,(4336814:0.09937,((150571:0.07911,2730777:0.10930):0.04404,((4042859:0.25381,(717487:0.13914,4363563:0.19585):0.02281):0.02587,(((3190878:0.16480,4452949:0.07312):0.05029,(4015030:0.10339,(4438491:0.04779,(2286116:0.08699,(4251079:0.03657,4349225:0.02256):0.01189):0.01091):0.04963):0.01748):0.02917,(3014179:0.16455,(2170497:0.16101,(2107103:0.22406,951205:0.11633):0.02436):0.02574):0.03041):0.01561):0.02862):0.02589):0.01914):0.01811):0.01347):0.01451,((182569:0.14758,4363259:0.07793):0.04894,696036:0.14901):0.01514):0.01624):0.02659,(3761685:0.11278,4423155:0.16503):0.03965):0.09184);
'''
        tree70 = u'((4423550:0.17275,((4091454:0.108,4427993:0.1045)50:0.01575,((123662:0.06599,(3269889:0.12737,(104534:0.06041,734152:0.09136)20:0.00526)80:0.01669)90:0.01398,(300695:0.10755,225636:0.1317)100:0.0405)0:0.01073)40:0.0128)20:0.00782,(4377103:0.09243,((172946:0.08097,1145804:0.08645)100:0.02986,(1941303:0.0953,4332975:0.09505)90:0.00838)100:0.02206)90:0.0272,((((1931714:0.07012,(4322265:0.10071,4343117:0.13235)100:0.01842)100:0.03116,(((759363:0.05402,4459468:0.0433)100:0.02405,(294612:0.14484,2679839:0.1009)90:0.02132)70:0.01331,((((((730039:0.15444,((4015030:0.11176,(4438491:0.04568,(4349225:0.02406,(2286116:0.08501,(4251079:0.02026,4386156:0.01582)80:0.01016)40:0.0097)80:0.0168)100:0.03826)50:0.01397,(4308961:0.10766,4452949:0.05355)90:0.06215)40:0.01455)50:0.01325,(((1718272:0.12738,(150571:0.08502,(699249:0.03117,2730777:0.03253)100:0.06302)70:0.02174)60:0.03847,(((2107103:0.20025,3190878:0.14435)40:0.03601,(1824285:0.10892,3014179:0.14706)30:0.02039)0:0.01309,((3366304:0.09202,951205:0.07509)100:0.05732,2170497:0.16332)90:0.02722)10:0.01937)0:0.01868,(3064426:0.20791,((1837676:0.14477,(4363563:0.14803,4479774:0.10823)90:0.04638)90:0.03766,(4042859:0.2295,717487:0.15674)40:0.01749)20:0.01416)0:0.01063)0:0.03387)100:0.04795,4336814:0.08037)0:0.02958,(346735:0.11193,4391683:0.07639)60:0.00894)0:0.01312,1142178:0.07594)0:0.01881,(229854:0.0646,4460175:0.09289)90:0.02422)20:0.01731)0:0.01339)0:0.00777,(((2984017:0.05634,4340384:0.07722)80:0.03016,(((4371218:0.13005,(1133483:0.08797,3106714:0.09717)90:0.02053)80:0.02174,(3256066:0.08328,4022282:0.11841)90:0.03619)100:0.03392,((202294:0.06795,1138804:0.07777)100:0.05296,(3192744:0.09608,(2014493:0.11684,(180127:0.06532,4417185:0.0713)100:0.03824)100:0.0368)40:0.01663)70:0.00787)50:0.01733)10:0.0083,(222095:0.1391,(288404:0.13004,(4323734:0.07601,4446882:0.06844)60:0.01661)100:0.02863)40:0.01639)0:0.00846)0:0.0135,(((((1133369:0.07769,4336154:0.07979)100:0.11778,(((708774:0.0822,((114724:0.047,82092:0.04936)100:0.11526,(201206:0.10329,4423155:0.14181)60:0.03138)40:0.01886)80:0.03209,(202302:0.11673,3761685:0.09059)100:0.02325)90:0.02946,(((576962:0.11188,202459:0.09918)90:0.033,(213358:0.0989,(3390949:0.09853,3726184:0.09836)90:0.03298)90:0.02315)20:0.01425,202949:0.15903)0:0.01188)20:0.02709)10:0.01609,((4323100:0.0982,4409929:0.10612)60:0.01386,((696036:0.11283,(203529:0.18615,202449:0.08377)10:0.02209)30:0.02916,((2361381:0.18808,203220:0.10905)100:0.04166,(4363260:0.07208,(3779572:0.04977,114015:0.13268)70:0.02151)70:0.01055)100:0.04229)0:0.01717)0:0.01634)0:0.00519,(539547:0.12233,(4409453:0.14784,(4363259:0.05689,((268769:0.0594,266521:0.05311)100:0.04977,(182569:0.10314,4463866:0.07165)70:0.01505)100:0.04024)80:0.01602)100:0.05088)20:0.02162)0:0.0112,((573196:0.11279,((((3825327:0.11767,4298210:0.09472)100:0.07495,(836195:0.11165,801940:0.09002)100:0.02232)90:0.0347,((1928988:0.1129,(1129716:0.13293,2909029:0.13959)50:0.01858)70:0.02572,(((815912:0.12176,((219508:0.13512,(426860:0.12643,(202758:0.04748,4344033:0.03692)100:0.11429)90:0.0487)20:0.00791,((823117:0.10669,823009:0.0888)90:0.0381,3770699:0.24911)50:0.02136)40:0.02309)30:0.01326,(4455990:0.05381,(1128285:0.06585,4271527:0.03794)70:0.02727)100:0.06911)10:0.01546,4097115:0.09311)30:0.02142)20:0.01039)20:0.02855,(729293:0.18117,3871866:0.11553)90:0.03599)100:0.15854)20:0.02836,150700:0.13922)20:0.02787)0:0.00717)0:0.00859)100;'
        
        old_tree = Tree.get(schema='newick', data=tree67)
        tree_to_reroot = Tree.get(schema='newick', data=tree70)
        new_tree = Rerooter().reroot_by_tree(
            old_tree,
            tree_to_reroot)
        
        expected_lefts = old_tree.seed_node.child_nodes()[0].leaf_nodes()
        expected_rights = old_tree.seed_node.child_nodes()[1].leaf_nodes()
        for tip in expected_lefts:
            self.assertTrue(tip.taxon.label in [t.taxon.label for t in new_tree.seed_node.child_nodes()[1].leaf_nodes()])
        for tip in expected_rights:
            self.assertTrue(tip.taxon.label in [t.taxon.label for t in new_tree.seed_node.child_nodes()[0].leaf_nodes()])
        self.assertEqual(len(tree_to_reroot.leaf_nodes()), len(new_tree.leaf_nodes()))
Exemple #39
0
def generate_ATT_from_phylesystem(aln,
                                  workdir,
                                  study_id,
                                  tree_id,
                                  phylesystem_loc='api'):
    """gathers together tree, alignment, and study info - forces names to otu_ids.
    Outputs AlignTreeTax object.
    an alignemnt, a
    Input can be either a study ID and tree ID from OpenTree
    Alignemnt need to be a Dendropy DNA character matrix!"""
    #TODO CHECK ARGS
    assert(isinstance(aln, datamodel.charmatrixmodel.DnaCharacterMatrix))
    for tax in aln.taxon_namespace:
        tax.label = tax.label.replace(" ", "_") #Forcing all spaces to underscore UGH
    nexson = get_nexson(study_id, phylesystem_loc)
    ott_ids = get_subtree_otus(nexson,
                               tree_id=tree_id,
                               subtree_id="ingroup",
                               return_format="ottid")
    ott_mrca = get_mrca_ott(ott_ids)
    newick = extract_tree(nexson,
                          tree_id,
                          PhyloSchema('newick',
                                      output_nexml2json='1.2.1',
                                      content="tree",
                                      tip_label="ot:originalLabel"))
    newick = newick.replace(" ", "_") #UGH Very heavy handed, need to make sure happens on alignement side as well.
    tre = Tree.get(data=newick,
                   schema="newick",
                   preserve_underscores=True,
                   taxon_namespace=aln.taxon_namespace)
    otus = get_subtree_otus(nexson, tree_id=tree_id)
    otu_dict = {}
    orig_lab_to_otu = {}
    treed_taxa = {}
    for otu_id in otus:
        otu_dict[otu_id] = extract_otu_nexson(nexson, otu_id)[otu_id]
        otu_dict[otu_id]['^physcraper:status'] = "original"
        otu_dict[otu_id]['^physcraper:last_blasted'] = "1900/01/01"
        orig = otu_dict[otu_id].get(u'^ot:originalLabel').replace(" ", "_")
        orig_lab_to_otu[orig] = otu_id
        treed_taxa[orig] = otu_dict[otu_id].get(u'^ot:ottId')
    for tax in aln.taxon_namespace:
        try:
            tax.label = orig_lab_to_otu[tax.label].encode('ascii')
        except KeyError:
            sys.stderr.write("{} doesn't have an otu id. It is being removed from the alignement. This may indicate a mismatch between tree and alignement\n".format(tax.label))
   #need to prune tree to seqs and seqs to tree...     
    otu_newick = tre.as_string(schema="newick")
    return AlignTreeTax(otu_newick, otu_dict, aln, ingroup_mrca=ott_mrca, workdir=workdir) #newick should be bare, but alignement should be DNACharacterMatrix
def read_matrix_and_tree(char_file_path,
                         tree_file_path,
                         char_type=DnaCharacterMatrix,
                         char_schema='fasta',
                         tree_schema='newick'):
    if char_file_path:
        d = char_type.get(path=char_file_path, schema=char_schema)
        tn = d.taxon_namespace
        tn.is_mutable = False
    else:
        d, tn = None, None
    tree = Tree.get(path=tree_file_path,
                    schema=tree_schema,
                    preserve_underscores=True,
                    taxon_namespace=tn)
    return d, tree
Exemple #41
0
 def test_input_unrooted_tree(self):
     otu61 = os.path.join(path_to_data, '61_otus.gpkg','61_otus.refpkg')
     with tempfile.NamedTemporaryFile(suffix='.fa') as bad_alignment:
         with tempdir.TempDir() as tmp:
             Create(prerequisites).main(
                 taxtastic_taxonomy=os.path.join(otu61,'61_otus_taxonomy.csv'),
                 taxtastic_seqinfo=os.path.join(otu61,'61_otus_seqinfo.csv'),
                 # created with newick_utils:
                 # nw_prune test/data/61_otus.gpkg/61_otus.refpkg/61_otus.tre 4459468 >test/data/61_otus.without_4459468.tre
                 unrooted_tree=os.path.join(path_to_data,'create','61_otus.without_4459468.tre'),
                 sequences=os.path.join(path_to_data,'create','61_otus.without_4459468.fasta'),
                 alignment=os.path.join(path_to_data,'create','61_otus.without_4459468.aln.fasta'),
                 prefix=tmp, force=True)
             gpkg = GraftMPackage.acquire(tmp)
             tree=Tree.get(schema='newick', data=open(gpkg.reference_package_tree_path()).readline())
             self.assertEqual(21, len(tree.leaf_nodes()))
Exemple #42
0
 def generate_streamed_alignment(self):
     """runs the key steps and then replaces the tree and alignemnt with the expanded ones"""
     self.read_blast()
     pickle.dump(self, open('{}/scrape.p'.format(self.workdir), 'wb'))
     if len(self.new_seqs) > 0:
         self.remove_identical_seqs()
         self.data.write_files() #should happen before aligning in case of pruning
         if len(self.new_seqs_otu_id) > 0:#TODO rename to something more intutitive
             self.write_query_seqs()
             self.align_query_seqs()
             self.data.reconcile()
             self.place_query_seqs()
             self.est_full_tree()
             self.data.tre = Tree.get(path="{}/RAxML_bestTree.{}".format(self.workdir, self.date),
                                      schema="newick",
                                      preserve_underscores=True,
                                      taxon_namespace=self.data.aln.taxon_namespace) 
             self.data.write_files()
             if os.path.exists("{}/previous_run".format(self.workdir)):
                 prev_dir =  "{}/previous_run{}".format(self.workdir, self.date)
                 i = 0
                 while os.path.exists(prev_dir):
                     i+=1
                     prev_dir = "previous_run" + str(i)
                 os.rename("{}/previous_run".format(self.workdir), prev_dir)
             os.rename(self.blast_subdir, "{}/previous_run".format(self.workdir))
             if os.path.exists("{}/last_completed_update".format(self.workdir)):
                 os.rename(self.tmpfi, "{}/last_completed_update".format(self.workdir))
             for filename in glob.glob('{}/RAxML*'.format(self.workdir)):
                 os.rename(filename, "{}/previous_run/{}".format(self.workdir, filename.split("/")[1]))
             for filename in glob.glob('{}/papara*'.format(self.workdir)):
                 os.rename(filename, "{}/previous_run/{}".format(self.workdir, filename.split("/")[1]))
             os.rename("{}/{}".format(self.workdir, self.newseqs_file), "{}/previous_run/newseqs.fasta".format(self.workdir))
             self.data.write_labelled()
             self.new_seqs = {} #Wipe for next run
             self.new_seqs_otu_id = {}
             self.repeat = 1
         else:
             sys.stdout.write("No new sequences after filtering.\n")
             self.repeat = 0
     else:
             sys.stdout.write("No new sequences found.\n")
             self.repeat = 0
     self.reset_markers()
     pickle.dump(self, open('{}/scrape.p'.format(self.workdir), 'wb'))
     pickle.dump(self.data.otu_dict, open('{}/otu_dict.p'.format(self.workdir), 'wb'))
Exemple #43
0
 def __init__(self, newick, otu_dict, alignment, ingroup_mrca, workdir):
     self.aln = alignment
     self.tre = Tree.get(data=newick,
                         schema="newick",
                         preserve_underscores=True,
                         taxon_namespace=self.aln.taxon_namespace)
     self.otu_dict = otu_dict
     self.ps_otu = 1 #iterator for new otu IDs
     self._reconcile_names()
     self.workdir = workdir #TODO - is this where the workdir should live?
     if not os.path.exists(self.workdir):
         os.makedirs(self.workdir)
     assert int(ingroup_mrca)
     self.ott_mrca = ingroup_mrca
     self.orig_seqlen = [] #FIXME
     self.gi_dict = {}
     self.orig_aln = alignment
     self.orig_newick = newick
def mutable_read_matrix_and_tree(char_file_path,
                                tree_file_path,
                                char_type=DnaCharacterMatrix,
                                char_schema='fasta',
                                tree_schema='newick'):
    '''Reads in tree and character matrix,
    mutable namespace means names may not match'''
    if char_file_path:
        char_mat = char_type.get(path=char_file_path,
                               schema=char_schema)
        # make the taxon_namespace mutable,
        # so that tree can be read even if different
        char_mat.taxon_namespace.is_mutable = True
        tree = Tree.get(path=tree_file_path,
                        schema=tree_schema,
                        preserve_underscores=True,
                        taxon_namespace=char_mat.taxon_namespace)
    else:
        char_mat, tree = None, None
    return char_mat, tree
Exemple #45
0
    def test_reroot_trifurcated_tree_at_longest_child(self):
        test_tree_1 =Tree.get(schema='newick', data=u'(A:0.1,B:0.2,(C:0.3,D:0.4):0.5);')
        test_tree_2 =Tree.get(schema='newick', data=u'(A:0.5,B:0.2,(C:0.3,D:0.4):0.1);')
        test_tree_3 =Tree.get(schema='newick', data=u'(A:0.2,B:0.5,(C:0.3,D:0.4):0.1);')
        
        expected_test_tree_1 = str(Tree.get(schema='newick', data=u"((C:0.3,D:0.4):0.25,(A:0.1,B:0.2):0.25);"))
        expected_test_tree_2 = str(Tree.get(schema='newick', data=u"(A:0.25,(B:0.2,(C:0.3,D:0.4):0.1):0.25);"))
        expected_test_tree_3 = str(Tree.get(schema='newick', data=u"(B:0.25,(A:0.2,(C:0.3,D:0.4):0.1):0.25);"))
        
        rerooted_test_tree_1 = str(Rerooter().reroot(test_tree_1)).strip()
        rerooted_test_tree_2 = str(Rerooter().reroot(test_tree_2)).strip()
        rerooted_test_tree_3 = str(Rerooter().reroot(test_tree_3)).strip()

        self.assertEqual(rerooted_test_tree_1, expected_test_tree_1)
        self.assertEqual(rerooted_test_tree_2, expected_test_tree_2)
        self.assertEqual(rerooted_test_tree_3, expected_test_tree_3)
Exemple #46
0
 def place_query_seqs(self):
     """runs raxml on the tree, and the combined alignment including the new quesry seqs
     Just for placement, to use as starting tree."""
     if os.path.exists("RAxML_labelledTree.PLACE"):
             os.rename(filename, "RAxML_labelledTreePLACE.tmp")
     sys.stdout.write("placing query sequences \n")
     os.chdir(self.workdir)
     p1 = subprocess.call(["raxmlHPC", "-m", "GTRCAT",
                           "-f", "v",
                           "-s", "papara_alignment.extended",
                           "-t", "random_resolve.tre",
                           "-n", "PLACE"])
     placetre = Tree.get(path="RAxML_labelledTree.PLACE",
                         schema="newick",
                         preserve_underscores=True)
     placetre.resolve_polytomies()
     for taxon in placetre.taxon_namespace:
         if taxon.label.startswith("QUERY"):
             taxon.label = taxon.label.replace("QUERY___", "")
     placetre.write(path="place_resolve.tre", schema="newick", unquoted_underscores=True)
     os.chdir('..')
     self._query_seqs_placed = 1
Exemple #47
0
 def write_labelled(self, label='^ot:ottTaxonName', treepath="labelled.tre", alnpath="labelled.fas"):
     """output tree and alignement with human readble labels
     Jumps through abunch of hoops to make labels unique.
     NOT MEMORY EFFICIENT AT ALL"""
     assert label in ['^ot:ottTaxonName', "^ot:originalLabel", "^ot:ottId", "^ncbi:taxon"]
     tmp_newick = self.tre.as_string(schema="newick")
     tmp_tre = Tree.get(data=tmp_newick,
                        schema="newick",
                        preserve_underscores=True)
     tmp_fasta = self.aln.as_string(schema="fasta")
     tmp_aln = DnaCharacterMatrix.get(data=tmp_fasta,
                                      schema="fasta",
                                      taxon_namespace=tmp_tre.taxon_namespace)
     new_names = set()
     for taxon in tmp_tre.taxon_namespace:
         new_label = self.otu_dict[taxon.label].get(label)
         if new_label:
             if new_label in new_names:
                 new_label = " ".join([new_label, taxon.label])
             new_names.add(new_label)
             taxon.label = new_label
         elif self.otu_dict[taxon.label].get("^ot:originalLabel"):
             new_label = self.otu_dict[taxon.label].get("^ot:originalLabel")
             if new_label in new_names:
                 new_label = " ".join([new_label, taxon.label])
             new_names.add(new_label)
             taxon.label = new_label
         elif self.otu_dict[taxon.label].get("^ncbi:taxon"):
             new_label = " ".join(["ncbi", str(self.otu_dict[taxon.label].get("^ncbi:taxon"))])
             if new_label in new_names:
                 new_label = " ".join([new_label, taxon.label])
             new_names.add(new_label)
             taxon.label = new_label
     tmp_tre.write(path="{}/{}".format(self.workdir, treepath),
                   schema="newick",
                   unquoted_underscores=True,
                   suppress_edge_lengths=False)
     tmp_aln.write(path="{}/{}".format(self.workdir, alnpath),
                   schema="fasta")
Exemple #48
0
    def test_write_fasttree_newick(self):
        tc  = DendropyTreeCleaner()
        tree = Tree.get(data="((a,b),(d,e))root;", schema='newick')
        s = StringIO()
        tc.write_fasttree_newick(tree, s)
        self.assertEqual("((a,b),(d,e));\n", s.getvalue())

        # Internal labels should be removed.
        tree = Tree.get(data="((a_2,b)c,(d,e)f)root;", schema='newick')
        s = StringIO()
        tc.write_fasttree_newick(tree, s)
        self.assertEqual("((a_2,b),(d,e));\n", s.getvalue())

        # Quoted spaces should become underscores.
        tree = Tree.get(data="(('a 2',b),(d,e))root;", schema='newick')
        s = StringIO()
        tc.write_fasttree_newick(tree, s)
        self.assertEqual("((a_2,b),(d,e));\n", s.getvalue())

        # Test underscores that are quoted.
        tree = Tree.get(data="(('a_2',b),(d,e))root;", schema='newick')
        s = StringIO()
        tc.write_fasttree_newick(tree, s)
        self.assertEqual("((a_2,b),(d,e));\n", s.getvalue())

        # Test dashes
        tree = Tree.get(data="((ANME-2dV10_01644,b),(d,e))root;", schema='newick')
        s = StringIO()
        tc.write_fasttree_newick(tree, s)
        self.assertEqual("((ANME-2dV10_01644,b),(d,e));\n", s.getvalue())

        # A more real world example with '~' characters (which never mattered actually).
        tree = Tree.get(
            data=u"('Asulf_Archaeoglobus.1_2280~2522125074':7.17,(('Afulgi_764~2528311132':0.0,'CP006577_764~2588253768':0.0):0.0,'AE000782_746~638154502':0.0)'s__Archaeoglobus fulgidus':7.555):1.461;\n",
            schema='newick')
        s = StringIO()
        tc.write_fasttree_newick(tree, s)
        self.assertEqual("(Asulf_Archaeoglobus.1_2280~2522125074:7.17,((Afulgi_764~2528311132:0.0,CP006577_764~2588253768:0.0):0.0,AE000782_746~638154502:0.0):7.555):1.461;\n", s.getvalue())
Exemple #49
0
def warn(*objs):
    print(*objs, file=sys.stderr)

#construct dict of OTTid:PopularityMetric 
popularity = {};
tsvin = csv.DictReader(args.popularity_file, delimiter='\t')
viewcols = [col for col in tsvin.fieldnames if 'pagecounts' in col]
for row in tsvin:
    try:
        views = [int(row[col]) for col in viewcols if row[col] and row[col].isdigit()]
        trMeanViews = mean(sorted(views)[:-2])
        popularity[row["OTTid"]] = (float(row["page_size"]) * trMeanViews)**0.5 #take the sqrt transform
    except (StatisticsError, ValueError):   #perhaps data is absent, a number is NA or we are trying to take a mean of an empty list - if so, ignore
        pass;
           
tree = Tree.get(file=args.intree, schema='newick', suppress_edge_lengths=True, preserve_underscores=True, suppress_leaf_node_taxa=True)

#put popularity as edge length
for node in tree.preorder_node_iter():
    if node.label in args.exclude:
        node.edge_length = 0
    else:
        try:
            node.edge_length = popularity[node.label.rsplit("_ott",1)[1]]
        except (LookupError, AttributeError):
            node.edge_length = 0

#go up the tree from the tips, summing up the popularity indices beneath
if args.branch_length in ['sum_descendant_popularities', 'sum_ancestor_and_descendant_popularities']:
    for node in tree.postorder_node_iter():
        if node.is_leaf():
def node_label_method(tree, outgroup):
    '''Interpret node labels as node attributes (default).'''    
    outgroup_node = tree.find_node_with_taxon_label(outgroup)
    new_root = outgroup_node.parent_node
    tree.reseed_at(new_root)
    return tree

def rooted_bipartition_method(tree, outgroup):
    '''Interpret node labels as branch support values.'''

    benc = tree.encode_bipartitions()
    support_values = {}
    for nd in tree:
        support_values[nd.bipartition] = float(nd.label) if nd.label is not None else 1.0

    outgroup_node = tree.find_node_with_taxon_label(outgroup)
    new_root = outgroup_node.parent_node
    tree.reseed_at(new_root)
    tree.encode_bipartitions()
    for nd in tree:
        nd.label = support_values.get(nd.bipartition, "not_specified")
    tree.seed_node.edge.length = None
    return tree

tree = Tree.get(file=open('test.nw'), schema="newick")#, rooting="force-rooted")
rooted_bipartition_method(tree, 'X')
nw = tree.as_string(schema='newick').strip()
print nw.replace('[&R] ', '')

# Related discussion: https://github.com/jeetsukumaran/DendroPy/issues/53
    stops.append(len(seq.values()))


stops.sort()
stop = stops[int(len(stops)/2)]

d = {}
for taxon, seq in orig_seq.items():
        d[str(taxon.label)] = seq.values()[:stop]
    

dna_orig = DnaCharacterMatrix.from_dict(d)

dna_taxa = [i for i in dna_orig.taxon_namespace]

tre_orig = Tree.get(path = "{}_random_resolve.tre".format("ascomycota"), schema = "newick",taxon_namespace=dna_orig.taxon_namespace)
""
treed_taxa = [i.taxon for i in tre_orig.leaf_nodes()]

tre_orig.prune_taxa(set(treed_taxa) - set(dna_taxa))

for taxon in set(dna_taxa) - set(treed_taxa):
	del d[taxon.label]

#####NEXT STEPS!!!

#make a function that doe sthis dumb shit in orig as well
dna_orig = DnaCharacterMatrix.from_dict(d)

tre_orig.write(path = "{}_orig_cut.tre".format(runname), schema = "newick", unquoted_underscores=True, suppress_edge_lengths=True)
        else:
            onlyfiles.append(AllFiles[j])
    for j in range(len(onlyfiles)):
        try:
            path = file_path+'/'+onlyfiles[j] 
            fil = open('/home/4/u1we1f44/Documents/appbio15/projekt/data/'+path, 'r')
            lines_list=fil.readlines()
            fil.close()
            test = SeqDic(lines_list) # If this dose not worke we do not have a FASTA file
            ##
            # Makes a newick tree and checks if the referense tree is recovered. The none reducing file.
            ##
            line = 'cat /home/4/u1we1f44/Documents/appbio15/projekt/data/'+path+' | fastprot -I fasta -O phylip | fnj -I phylip -O "newick" -o "Treeout.txt"' 
            os.system(line)
            TreePath=file_path+'/'+RefTree   
            t1=Tree.get(file=open('/home/4/u1we1f44/Documents/appbio15/projekt/data/'+TreePath,'r'),schema="newick",tree_offset=0)
            t2=Tree.get(file=open('/home/4/u1we1f44/Documents/appbio15/projekt/src/Treeout.txt','r'),schema="newick",tree_offset=0,taxon_namespace=t1.taxon_namespace)
            t1.encode_bipartitions()
            t2.encode_bipartitions()
            if treecompare.symmetric_difference(t1, t2)==0:
                NotFixedCount += 1
                os.remove('/home/4/u1we1f44/Documents/appbio15/projekt/src/Treeout.txt')
                Total += 1
            else:
                Total += 1
	        os.remove('/home/4/u1we1f44/Documents/appbio15/projekt/src/Treeout.txt')
            ##  
            # Makes a temporary file. In the temporary file with data with the nosie columns remoeved. MAkes a newick tree and checks if the refernse tree is recovered.
            # The nosie columns removed.
            ##
            os.system("touch temp.fa")
Exemple #53
0
    def build_subsets_tree(self, curr_tmp_dir_par,build_min_tree=True):
    # uym2 added: add option for MST
        if build_min_tree:
            _LOG.debug("START building Minimum Spanning Tree")
            grouping = {}
            groupName2jobName = {}
            
            for node in self.tree._tree.leaf_node_iter():
                groupName = self.pasta_team.subsets[node.taxon.label].tmp_dir_par[len(curr_tmp_dir_par)+1:]
                grouping[node.taxon.label] = groupName.replace("/","")
                groupName2jobName[groupName] = self.pasta_team.subsets[node.taxon.label]
            
            subsets_tree = build_groups_MST(self.tree._tree,grouping)
 
            for node in subsets_tree.postorder_node_iter():
               if node.is_leaf():
                   node.taxon.label = node.taxon.label.replace("d","/d")
               node.label = node.label.replace("d","/d") 

            self.pasta_team.subsets = groupName2jobName
            MST = PhylogeneticTree(subsets_tree) 
            _LOG.debug("Spanning tree is:\n %s" %MST)
            return MST
    ###################################


        _LOG.debug("START building heuristic spanning tree")

        translate={}
        t2 = {}
        for node in self.tree._tree.leaf_node_iter():
            nalsj = self.pasta_team.subsets[node.taxon.label]            
            newname = nalsj.tmp_dir_par[len(curr_tmp_dir_par)+1:]
            translate[node.taxon.label] = newname
            t2[newname] = set([nalsj])            
        subsets_tree = PhylogeneticTree(Tree.get(data=self.tree_str,schema='newick'))
        for node in subsets_tree._tree.leaf_node_iter():
            node.alignment_subset_job = t2[translate[node.taxon.label]]
            #node.alignment_subset_job = t2[node.taxon]
        del t2
        del translate
        _LOG.debug("leafs labeled")        
        #subsets_tree._tree.infer_taxa()
        #_LOG.debug("fake taxa inferred")                   
        #Then make sure the tree is rooted at a branch (not at a node). 
        if len(subsets_tree._tree.seed_node.child_nodes()) > 2:
            for c in subsets_tree._tree.seed_node.child_nodes():
                if c.edge.is_internal():
                    break
            subsets_tree._tree.is_rooted = True
            subsets_tree._tree.reroot_at_edge(c.edge,length1=c.edge.length/2., 
                                              length2=c.edge.length/2., suppress_unifurcations=False)                        
        _LOG.debug("Subset Labeling (start):\n%s" %str(subsets_tree.compose_newick(suppress_rooting=False))[0:5000])
        #_LOG.debug("Subset Labeling (start):\n%s" %str(len(subsets_tree._tree.seed_node.child_nodes())))
        # Then label internal branches based on their children, and collapse redundant edges. 
        for node in subsets_tree._tree.postorder_internal_node_iter():
            # my label is the intersection of my children, 
            # unless the intersection is empty, in which case it is the union
            if not hasattr(node, "alignment_subset_job") or node.alignment_subset_job is None:
                node.alignment_subset_job = set.intersection(*[c.alignment_subset_job for c in node.child_nodes()])
                if not node.alignment_subset_job:
                    node.alignment_subset_job = set.union(*[c.alignment_subset_job for c in node.child_nodes()])
            # Now go ahead and prune any child whose label encompasses my label. 
            # Use indexing instead of iteration, because with each collapse, 
            # new children can be added, and we want to process them as well.                         
            i = 0;
            while i < len(node.child_nodes()):                                
                c = node.child_nodes()[i]
                if node.alignment_subset_job.issubset(c.alignment_subset_job):
                    # Dendropy does not collapsing and edge that leads to a tip. Remove instead
                    if c.child_nodes():
                        c.edge.collapse()                                    
                    else:
                        node.remove_child(c)                      
                else:
                    i += 1
            
            node.label = "+".join(nj.tmp_dir_par[len(curr_tmp_dir_par)+1:] for nj in node.alignment_subset_job)
            if node.is_leaf():
                node.taxon = subsets_tree._tree.taxon_namespace.new_taxon(label=node.label)
            
        _LOG.debug("Before final round, the tree is:\n %s" %str(subsets_tree.compose_newick(suppress_rooting=False))[0:5000])
        # Now, the remaining edges have multiple labels. These need to
        # be further resolved. Do it by minimum length
        #   First find all candidate edges that we might want to contract
        candidate_edges = set()
        for e in subsets_tree._tree.postorder_edge_iter():
            if e.tail_node and e.head_node.alignment_subset_job.intersection(e.tail_node.alignment_subset_job):
                candidate_edges.add( (e.length,e) )
        #   Then sort the edges, and start removing them one by one
        #   only if an edge is still having intersecting labels at the two ends                                                    
        candidate_edges = sorted(candidate_edges, key=lambda x:  x[0] if x[0] else -1)       
        for (el, edge) in candidate_edges:
            I = edge.tail_node.alignment_subset_job.intersection(edge.head_node.alignment_subset_job)
            if I:
                edge.tail_node.alignment_subset_job = I 
                if edge.head_node.child_nodes():
                    #edge.collapse(adjust_collapsed_head_children_edge_lengths=True)
                    edge.collapse()
                else:
                    edge.tail_node.remove_child(edge.head_node)
        # Make sure the tree is correct, remove the actual jobs
        # from nodes (can cause deep-copy problems), assign a label to each
        # node, and keep a mapping between the labels and actual alignment job objects
        self.pasta_team.subsets = {} # Let this now map from subset labels to the actual alignment jobs
        for node in subsets_tree._tree.postorder_node_iter():
            assert len(node.alignment_subset_job) == 1
            nalsj = node.alignment_subset_job.pop()
            node.alignment_subset_job = None 
            node.label = nalsj.tmp_dir_par[len(curr_tmp_dir_par)+1:]#only find last part of the name
            self.pasta_team.subsets[node.label] = nalsj
            if node.is_leaf():
                # Add a dummy taxon, or else dendropy can get confused
                node.taxon = subsets_tree._tree.taxon_namespace.new_taxon(label=node.label)
        #subsets_tree._tree.infer_taxa()
        _LOG.debug("Spanning tree is:\n %s" %subsets_tree)
        labels = [nd.label for nd in subsets_tree._tree.postorder_node_iter()]
        if len(set(labels)) != len(labels):
            import collections
            raise Exception("Duplicate names found %s" %"\n".join
                   (item for item, count in 
                    collections.Counter(labels).items() if count > 1))
           
        return subsets_tree
from dendropy import Tree


label_nodes = {'Other':0, 'Chloroplastida_ott361838':1, 'Metazoa_ott691846':2, 'Fungi_ott352914':3, 'Bacteria_ott844192':4}
target_nodes = {}
names = {index:re.sub("_ott\d+", "", k) for k, index in label_nodes.items()}

parser = argparse.ArgumentParser(description='Count the number of unnamed nodes in a tree')
parser.add_argument('treefile', type=argparse.FileType('r'), help='A newick-format tree')

args = parser.parse_args()

def warn(*objs):
    print(*objs, file=sys.stderr)

tree = Tree.get(file=args.treefile, schema='newick', preserve_underscores=True, suppress_leaf_node_taxa=True)

#set edge length to number of leaves
for node in tree.postorder_node_iter():
    if node.is_leaf():
        node.n_leaves = 1
    else:
        if node.label in label_nodes:
             target_nodes[node.label] = node
    try:
        node._parent_node.n_leaves += node.n_leaves
    except:
        try:
            node._parent_node.n_leaves = node.n_leaves
        except:
            pass #the root
Exemple #55
0
 lines_list = fil.readlines()
 fil.close()
 test = SeqDic(lines_list)  # If this dose not worke we do not have a FASTA file
 ##
 # Makes a newick tree and checks if the referense tree is recovered. The none reducing file.
 ##
 line = (
     "cat /home/4/u1we1f44/Documents/appbio15/project/data/"
     + path
     + ' | fastprot -I fasta -O phylip | fnj -I phylip -O "newick" -o "Treeout.txt"'
 )
 os.system(line)
 TreePath = file_path + "/" + RefTree
 t1 = Tree.get(
     file=open("/home/4/u1we1f44/Documents/appbio15/project/data/" + TreePath, "r"),
     schema="newick",
     tree_offset=0,
 )
 t2 = Tree.get(
     file=open("/home/4/u1we1f44/Documents/appbio15/project/src/Treeout.txt", "r"),
     schema="newick",
     tree_offset=0,
     taxon_namespace=t1.taxon_namespace,
 )
 t1.encode_bipartitions()
 t2.encode_bipartitions()
 if treecompare.symmetric_difference(t1, t2) == 0:
     NotFixedCount += 1
     os.remove("/home/4/u1we1f44/Documents/appbio15/project/src/Treeout.txt")
     Total += 1
 else:
Exemple #56
0
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.
"""
import argparse

from dendropy import Tree

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Rescale tree height')
    parser.add_argument('--max-height', type=float, metavar='FLOAT', default=0.1,
                        help='Scale longest branch to max height [0.1]')
    parser.add_argument('--if', dest='input_format', default='newick', choices=['newick', 'nexus', 'nexml'],
                        help='Input tree format [newick]')
    parser.add_argument('--of', dest='output_format', default='newick', choices=['newick', 'nexus', 'nexml'],
                        help='output tree format [newick]')
    parser.add_argument('input', type=argparse.FileType('r'), default='-',
                        help='Input tree')
    parser.add_argument('output', type=argparse.FileType('w'), default='-',
                        nargs='?', help='Output tree [stdout]')
    args = parser.parse_args()

    tr = Tree.get(file=args.input, schema=args.input_format)
    tr.scale_edges(args.max_height / max(tr.calc_node_root_distances()))
    tr.write_to_stream(args.output, args.output_format)
def sum_popularity_over_tree(tree, OTT_ptrs=None, exclude=[], pop_store='pop', verbosity=0):
    """Add popularity indices for branch lengths based on a phylogenetic tree (and return the tree, or the number of root descendants).
    We might want to exclude some names from the popularity metric (e.g. exclude archosaurs, 
    to make sure birds don't gather popularity intended for dinosaurs). This is done by passing an
    array such as ['Dinosauria_ott90215', 'Archosauria_ott335588'] as the exclude argument.
    
    'tree' can be the name of a tree file or a dendropy tree object
    
    'pop_store' is the name of the attribute in which to store the popularity. If you wish to create a tree
    with popularity on the branches, you can pass in pop_store='edge_length'
    
    NB: if OTT_ptrs is given, then the popularity is stored in the object pointed to by OTT_ptrs[OTTid]['wd']['final_wiki_item']['pop'], where
    OTTid can be extracted from the node label in the tree. If OTT_ptrs is None, then the popularity is stored in the node object 
    itself, in Node.data['wd']['pop'].
    popularity summed up and down the tree depends on the OpenTree structure, and is stored in OTT_ptrs[OTTid]['pop_ancst'] 
    (popularity summed upwards for all ancestors of this node) and OTT_ptrs[OTTid]['pop_dscdt'] (popularity summed over all descendants).
    To get a measure of the sum of both ancestor and descendant popularity, just add these together
    
    we also count up the *number* of edges above each node to the root and the number of those that have a popularity measure. These are stored in 
    
    OTT_ptrs[OTTid]['n_ancst'] and OTT_ptrs[OTTid]['n_pop_ancst']
    
    we also flag up the poor seed plants (Spermatophyta_ott1007992)- we could add a little to their pop value later
    
    """
    from dendropy import Tree
    
    if not isinstance(tree, Tree):
        tree = Tree.get(file=tree, schema='newick', suppress_edge_lengths=True, preserve_underscores=True, suppress_leaf_node_taxa=True)
    
    if verbosity:
        print(" Tree read for phylogenetic popularity calc: mem usage {:.1f} Mb".format(memory_usage_resource()), file=sys.stderr)
    
    #put popularity into the pop_store attribute
    for node in tree.preorder_node_iter():
        if node.label in exclude:
            node.pop_store=0
        else:
            try:
                node.pop_store = float(OTT_ptrs[int(node.label.rsplit("_ott",1)[1])]['wd']['final_wiki_item']['pop']) if OTT_ptrs else node.data['wd']['final_wiki_item']['pop']
                node.has_pop = True
            except (LookupError, AttributeError, ValueError):
                node.pop_store=0
                node.has_pop = False
    
    #go up the tree from the tips, summing up the popularity indices beneath and adding the number of descendants
    for node in tree.postorder_node_iter():
        if node.is_leaf():
            node.descendants_popsum = 0
            node.n_descendants = 0
        try:
            node._parent_node.n_descendants += (1+node.n_descendants)
            node._parent_node.descendants_popsum += (node.pop_store + node.descendants_popsum)
        except AttributeError: #could be the first time we have checked the parent
            try:
                node._parent_node.n_descendants = (1 + node.n_descendants)
                node._parent_node.descendants_popsum = (node.pop_store + node.descendants_popsum)
            except AttributeError: #this could be the root, with node._parent_node = None
                root_descendants=node.n_descendants
    
    
    #go down the tree from the root, summing up the popularity indices above, and summing up numbers of nodes
    for node in tree.preorder_node_iter():
        if node.parent_node is None:
             #this is the root.
            node.seedplant = False
            node.n_ancestors = 0    
            node.n_pop_ancestors = 0    
            node.ancestors_popsum = 0.0   
        else:
            node.n_ancestors = node._parent_node.n_ancestors + 1
            node.ancestors_popsum = node._parent_node.ancestors_popsum + node.pop_store
            if getattr(node, 'has_pop', None):
                node.n_pop_ancestors = node._parent_node.n_pop_ancestors + 1
            else:
                node.n_pop_ancestors = node._parent_node.n_pop_ancestors            
            if node.label and node.label =='Spermatophyta':
                node.seedplant = True
                print("Found plant root", file=sys.stderr)
            else:
                node.seedplant = node._parent_node.seedplant
    
    #place these values into the OTT_ptrs structure
    if OTT_ptrs:
        for node in tree.preorder_node_iter():
            try:
                OTT_ptrs[int(node.label.rsplit("_ott",1)[1])]['pop_self'] = node.pop_store
                OTT_ptrs[int(node.label.rsplit("_ott",1)[1])]['pop_ancst'] = node.ancestors_popsum #nb, this includes popularity of self
                OTT_ptrs[int(node.label.rsplit("_ott",1)[1])]['pop_dscdt'] = node.descendants_popsum
                OTT_ptrs[int(node.label.rsplit("_ott",1)[1])]['n_ancst'] = node.n_ancestors
                OTT_ptrs[int(node.label.rsplit("_ott",1)[1])]['n_dscdt'] = node.n_descendants
                OTT_ptrs[int(node.label.rsplit("_ott",1)[1])]['n_pop_ancst'] = node.n_pop_ancestors
                OTT_ptrs[int(node.label.rsplit("_ott",1)[1])]['is_seed_plant'] = node.seedplant
            except (LookupError, AttributeError):
                pass
    return tree
 context_name = "All life"
 with open(f,'r', encoding='utf-8') as treefile:
     treestr = treefile.read()
     treestart = treestr.find(']')
     if treestart == -1:
         treestart = 0
     treestart = treestr.find('(',treestart)
     if treestart == -1:
         print("No tree in file {}".format(f), file=sys.stderr)
         continue
     startstr = treestr[:treestart]
     m = context_re.search(startstr)
     if m:
         context_name = m.group(1)
     try:
         tree = Tree.get(data=treestr[treestart:], schema="newick", suppress_leaf_node_taxa=True, terminating_semicolon_required=False, preserve_underscores=True, rooting='default-rooted')
     except:
         print("WARNING: error reading tree '{}'".format(f))
         raise
     #check for polytomies
     for nd in tree.postorder_internal_node_iter():
         if len(nd._child_nodes) != 2:
             print("WARNING: in {} there is a branch ({}) with {} child nodes: this will be removed by OneZoom".format(f, nd.label or "<unnamed>", len(nd._child_nodes)), file=sys.stderr)
             
     #These are cases where v5 of the OpenTree incorrectly gives them the same number as another species
     OTT_wrong_synonyms =['Geochelone_nigra_ephippium', 'Geochelone_nigra_guntheri','Geochelone_nigra_vandenburghi', 'Geochelone_nigra_microphyes', 'Pachyptila_crassirostris', 'Ducula_spilorrhoa','Ducula_luctuosa', 'Ducula_subflavescens', 'Lophura_hoogerwerfi', 'Acomys_airensis', 'Alouatta_nigerrima', 'Myotis_occultus']
     #these are cases where OneZoom probably has an incorrect species (OpenTree has them as a synonym of something else) but I can't be bothered to correct the OZ tree
     OZ_spurious_spp = ['Cyclemys_orbiculata','Cyclemys_ovata']
     ignore = OTT_wrong_synonyms + OZ_spurious_spp
             
     if args.leavesonly: