import random from ete_dev import Tree # Creates a normal tree t = Tree( '((H:0.3,I:0.1):0.5, A:1, (B:0.4,(C:0.5,(J:1.3, (F:1.2, D:0.1):0.5):0.5):0.5):0.5);' ) print t # Let's locate some nodes using the get common ancestor method ancestor = t.get_common_ancestor("J", "F", "C") # the search_nodes method (I take only the first match ) A = t.search_nodes(name="A")[0] # and using the shorcut to finding nodes by name C = t & "C" H = t & "H" I = t & "I" # Let's now add some custom features to our nodes. add_features can be # used to add many features at the same time. C.add_features(vowel=False, confidence=1.0) A.add_features(vowel=True, confidence=0.5) ancestor.add_features(nodetype="internal") # Or, using the oneliner notation (t & "H").add_features(vowel=False, confidence=0.2) # But we can automatize this. (note that i will overwrite the previous # values) for leaf in t.traverse(): if leaf.name in "AEIOU": leaf.add_features(vowel=True, confidence=random.random()) else: leaf.add_features(vowel=False, confidence=random.random()) # Now we use these information to analyze the tree. print "This tree has", len(t.search_nodes(vowel=True)), "vowel nodes"
def get_rooting(tol, seed_species, agename=False): ''' returns dict of species age for a given TOL and a given seed **Example:** :: tol = "((((((((Drosophila melanogaster,(Drosophila simulans,Drosophila secchellia)),(Drosophila yakuba,Drosophila erecta))[&&NHX:name=melanogaster subgroup],Drosophila ananassae)[&&NHX:name=melanogaster group],(Drosophila pseudoobscura,Drosophila persimilis)[&&NHX:name=obscura group])[&&NHX:name=Sophophora Old World],Drosophila willistoni)[&&NHX:name=subgenus Sophophora],(Drosophila grimshawi,(Drosophila virilis,Drosophila mojavensis))[&&NHX:name=subgenus Drosophila])[&&NHX:name=genus Drosophila],(Anopheles gambiae,Aedes aegypti)[&&NHX:name=Culicidae])[&&NHX:name=Arthropoda],Caenorhabditis elegans)[&&NHX:name=Animalia];" seed = "Drosophila melanogaster" ROOTING, age2name = get_rooting (tol, seed, True) ROOTING == {"Aedes aegypti" : 7, "Anopheles gambiae" : 7, "Caenorhabditis elegans" : 8, "Drosophila ananassae" : 3, "Drosophila erecta" : 2, "Drosophila grimshawi" : 6, "Drosophila melanogaster" : 1, "Drosophila mojavensis" : 6, "Drosophila persimilis" : 4, "Drosophila pseudoobscura": 4, "Drosophila secchellia" : 1, "Drosophila simulans" : 1, "Drosophila virilis" : 6, "Drosophila willistoni" : 5, "Drosophila yakuba" : 2} age2name == {1: "Drosophila melanogaster. Drosophila simulans. Drosophila secchellia", 2: "melanogaster subgroup", 3: "melanogaster group", 4: "Sophophora Old World", 5: "subgenus Sophophora", 6: "genus Drosophila", 7: "Arthropoda", 8: "Animalia"} :argument seed_species: species name :argument False agename: if True, also returns the inverse dictionary :returns: ROOTING dictionary with age of each species ''' tol = Tree(tol) try: node = tol.search_nodes(name=seed_species)[0] except IndexError: exit('ERROR: Seed species not found in tree\n') age = 1 ROOTING = {} if agename: age2name = {} while not node.is_root(): node = node.up for leaf in node.get_leaf_names(): if agename: if node.name == 'NoName': nam = '.'.join(node.get_leaf_names()) else: nam = node.name age2name.setdefault(age, nam) ROOTING.setdefault(leaf, age) age += 1 if agename: return ROOTING, age2name return ROOTING
# /A-------| \G-------| # | | \-I # | | # | \-E #-NoName--| # | /-L # | /J-------| # | | | /-N # | | \O-------| # \C-------| \-Q # | # | /-P # \M-------| # \-S # Get pointers to specific nodes G = t.search_nodes(name="G")[0] J = t.search_nodes(name="J")[0] C = t.search_nodes(name="C")[0] # If we remove J from the tree, the whole partition under J node will # be detached from the tree and it will be considered an independent # tree. We can do the same thing using two approaches: J.detach() or # C.remove_child(J) removed_node = J.detach() # = C.remove_child(J) # if we know print the original tree, we will see how J partition is # no longer there. print "Tree after REMOVING the node J" print t.get_ascii(show_internal=True) # /-H # /D-------| # | \-K # /B-------|
def main(argv): parser = argparse.ArgumentParser(description=__DESCRIPTION__, formatter_class=argparse.RawDescriptionHelpFormatter) input_args = parser.add_argument_group("INPUT OPTIONS") input_args.add_argument("source_trees", metavar='source_trees', type=str, nargs="*", help='a list of source tree files') input_args.add_argument("--source_file", dest="source_file", type=str, help="""path to a file containing many source trees, one per line""") input_args.add_argument("-r", dest="reftree", type=str, required=True, help="""Reference tree""") input_args.add_argument("--ref_tree_attr", dest="ref_tree_attr", type=str, default="name", help=("attribute in ref tree used as leaf name")) input_args.add_argument("--src_tree_attr", dest="src_tree_attr", type=str, default="name", help=("attribute in source tree used as leaf name")) input_args.add_argument("--min_support_ref", type=float, default=0.0, help=("min support for branches to be considered from the ref tree")) input_args.add_argument("--min_support_src", type=float, default=0.0, help=("min support for branches to be considered from the source tree")) output_args = parser.add_argument_group("OUTPUT OPTIONS") output_args.add_argument("-o", dest="output", type=str, help="""Path to the tab delimited report file""") opt_args = parser.add_argument_group("DISTANCE OPTIONS") opt_args.add_argument("--outgroup", dest="outgroup", nargs = "+", help="""outgroup used to root reference and source trees before distance computation""") opt_args.add_argument("--expand_polytomies", dest="polytomies", action = "store_true", help="""expand politomies if necessary""") opt_args.add_argument("--unrooted", dest="unrooted", action = "store_true", help="""compare trees as unrooted""") opt_args.add_argument("--min_support", dest="min_support", type=float, default=0.0, help=("min support value for branches to be counted in the distance computation (RF, treeko and refTree/targeGene compatibility)")) opt_args = parser.add_argument_group("PHYLOGENETICS OPTIONS") opt_args.add_argument("--extract_species", action = "store_true", help="When used, leaf names in the reference and source trees are assumed to represent species." " If target trees are gene-trees whose species information is encoded as a part of the leaf sequence name," " it can be automatically extracted by providing a Perl regular expression that extract a " " valid species code (see --sp_regexp). Such information will be also used to detect duplication" " events. ") opt_args.add_argument("--sp_regexp", type=str, help=("Specifies a Perl regular expression to automatically extract species names" " from the name string in source trees. If not used, leaf names are assumed to represent species names." " Example: use this expression '[^_]+_(.+)' to extract HUMAN from the string 'P53_HUMAN'.")) opt_args.add_argument("--collateral", action='store_true', help=("")) args = parser.parse_args(argv) print __DESCRIPTION__ reftree = args.reftree if args.source_file and args.source_trees: print >>sys.stderr, 'The use of targets_file and targets at the same time is not supported.' sys.exit(1) if args.source_file: source_trees = tree_iterator(args.source_file) else: source_trees = args.source_trees ref_tree = Tree(reftree) if args.ref_tree_attr: for lf in ref_tree.iter_leaves(): lf._origname = lf.name if args.ref_tree_attr not in lf.features: print lf lf.name = getattr(lf, args.ref_tree_attr) if args.outgroup: if len(args.outgroup) > 1: out = ref_tree.get_common_ancestor(args.outgroup) else: out = ref_tree.search_nodes(name=args.outgroup[0])[0] ref_tree.set_outgroup(out) HEADER = ("source tree", 'ref tree', 'common\ntips', 'normRF', 'RF', 'maxRF', "%reftree", "%genetree", "subtrees", "treeko\ndist") if args.output: OUT = open(args.output, "w") print >>OUT, '# ' + ctime() print >>OUT, '# ' + ' '.join(sys.argv) print >>OUT, '#'+'\t'.join(HEADER) else: print '# ' + ctime() print '# ' + ' '.join(sys.argv) COL_WIDTHS = [20, 20] + [9] * 10 print_table([HEADER], fix_col_width=COL_WIDTHS, wrap_style='wrap') prev_tree = None ref_fname = os.path.basename(args.reftree) for counter, tfile in enumerate(source_trees): if args.source_file: seedid, tfile = tfile else: seedid = None if args.extract_species: if args.sp_regexp: SPMATCHER = re.compile(args.sp_regexp) get_sp_name = lambda x: re.search(SPMATCHER, x).groups()[0] else: get_sp_name = lambda x: x tt = PhyloTree(tfile, sp_naming_function = get_sp_name) else: tt = Tree(tfile) if args.src_tree_attr: for lf in tt.iter_leaves(): lf._origname = lf.name lf.name = getattr(lf, args.src_tree_attr) if args.outgroup: if len(args.outgroup) > 1: out = tt.get_common_ancestor(args.outgroup) else: out = tt.search_nodes(name=args.outgroup[0])[0] tt.set_outgroup(out) if args.source_trees: fname = os.path.basename(tfile) else: fname = '%05d' %counter r = tt.compare(ref_tree, ref_tree_attr=args.ref_tree_attr, source_tree_attr=args.src_tree_attr, min_support_ref=args.min_support_ref, min_support_source = args.min_support_src, unrooted=args.unrooted, has_duplications=args.extract_species) print_table([map(istr, [fname[-30:], ref_fname[-30:], r['effective_tree_size'], r['norm_rf'], r['rf'], r['max_rf'], r["source_edges_in_ref"], r["ref_edges_in_source"], r['source_subtrees'], r['treeko_dist']])], fix_col_width = COL_WIDTHS, wrap_style='cut') if args.output: OUT.close()
import random from ete_dev import Tree # Creates a normal tree t = Tree( '((H:0.3,I:0.1):0.5, A:1, (B:0.4,(C:0.5,(J:1.3, (F:1.2, D:0.1):0.5):0.5):0.5):0.5);' ) print t # Let's locate some nodes using the get common ancestor method ancestor=t.get_common_ancestor("J", "F", "C") # the search_nodes method (I take only the first match ) A = t.search_nodes(name="A")[0] # and using the shorcut to finding nodes by name C= t&"C" H= t&"H" I= t&"I" # Let's now add some custom features to our nodes. add_features can be # used to add many features at the same time. C.add_features(vowel=False, confidence=1.0) A.add_features(vowel=True, confidence=0.5) ancestor.add_features(nodetype="internal") # Or, using the oneliner notation (t&"H").add_features(vowel=False, confidence=0.2) # But we can automatize this. (note that i will overwrite the previous # values) for leaf in t.traverse(): if leaf.name in "AEIOU": leaf.add_features(vowel=True, confidence=random.random()) else: leaf.add_features(vowel=False, confidence=random.random()) # Now we use these information to analyze the tree. print "This tree has", len(t.search_nodes(vowel=True)), "vowel nodes" print "Which are", [leaf.name for leaf in t.iter_leaves() if leaf.vowel==True] # But features may refer to any kind of data, not only simple
# ---------| # | /-F # \--------| # \-G print "Tree3:", t3 # /-H # | # ---------| /-I # | /--------| # | | \-J # \--------| # | /-K # \--------| # \-L # Locates a terminal node in the first tree A = t1.search_nodes(name='A')[0] # and adds the two other trees as children. A.add_child(t2) A.add_child(t3) print "Resulting concatenated tree:", t1 # /-D # /--------| # | \-E # /--------| # | | /-F # | \--------| # /--------| \-G # | | # | | /-H # | | | # | \--------| /-I
from ete_dev import Tree t = Tree( '(A:1,(B:1,(C:1,D:1):0.5):0.5);' ) # Browse the tree from a specific leaf to the root node = t.search_nodes(name="C")[0] while node: print node node = node.up # --C # /-C # ---------| # \-D # # /-B # ---------| # | /-C # \--------| # \-D # # /-A # ---------| # | /-B # \--------| # | /-C # \--------| # \-D
def main(argv): parser = argparse.ArgumentParser( description=__DESCRIPTION__, formatter_class=argparse.RawDescriptionHelpFormatter) input_args = parser.add_argument_group("INPUT OPTIONS") input_args.add_argument("source_trees", metavar='source_trees', type=str, nargs="*", help='a list of source tree files') input_args.add_argument( "--source_file", dest="source_file", type=str, help="""path to a file containing many source trees, one per line""") input_args.add_argument("-r", dest="reftree", type=str, required=True, help="""Reference tree""") input_args.add_argument("--ref_tree_attr", dest="ref_tree_attr", type=str, default="name", help=("attribute in ref tree used as leaf name")) input_args.add_argument( "--src_tree_attr", dest="src_tree_attr", type=str, default="name", help=("attribute in source tree used as leaf name")) input_args.add_argument( "--min_support_ref", type=float, default=0.0, help=("min support for branches to be considered from the ref tree")) input_args.add_argument( "--min_support_src", type=float, default=0.0, help=( "min support for branches to be considered from the source tree")) output_args = parser.add_argument_group("OUTPUT OPTIONS") output_args.add_argument("-o", dest="output", type=str, help="""Path to the tab delimited report file""") opt_args = parser.add_argument_group("DISTANCE OPTIONS") opt_args.add_argument( "--outgroup", dest="outgroup", nargs="+", help= """outgroup used to root reference and source trees before distance computation""" ) opt_args.add_argument("--expand_polytomies", dest="polytomies", action="store_true", help="""expand politomies if necessary""") opt_args.add_argument("--unrooted", dest="unrooted", action="store_true", help="""compare trees as unrooted""") opt_args.add_argument( "--min_support", dest="min_support", type=float, default=0.0, help= ("min support value for branches to be counted in the distance computation (RF, treeko and refTree/targeGene compatibility)" )) opt_args = parser.add_argument_group("PHYLOGENETICS OPTIONS") opt_args.add_argument( "--extract_species", action="store_true", help= "When used, leaf names in the reference and source trees are assumed to represent species." " If target trees are gene-trees whose species information is encoded as a part of the leaf sequence name," " it can be automatically extracted by providing a Perl regular expression that extract a " " valid species code (see --sp_regexp). Such information will be also used to detect duplication" " events. ") opt_args.add_argument( "--sp_regexp", type=str, help= ("Specifies a Perl regular expression to automatically extract species names" " from the name string in source trees. If not used, leaf names are assumed to represent species names." " Example: use this expression '[^_]+_(.+)' to extract HUMAN from the string 'P53_HUMAN'." )) opt_args.add_argument("--collateral", action='store_true', help=("")) args = parser.parse_args(argv) print __DESCRIPTION__ reftree = args.reftree if args.source_file and args.source_trees: print >> sys.stderr, 'The use of targets_file and targets at the same time is not supported.' sys.exit(1) if args.source_file: source_trees = tree_iterator(args.source_file) else: source_trees = args.source_trees ref_tree = Tree(reftree) if args.ref_tree_attr: for lf in ref_tree.iter_leaves(): lf._origname = lf.name if args.ref_tree_attr not in lf.features: print lf lf.name = getattr(lf, args.ref_tree_attr) if args.outgroup: if len(args.outgroup) > 1: out = ref_tree.get_common_ancestor(args.outgroup) else: out = ref_tree.search_nodes(name=args.outgroup[0])[0] ref_tree.set_outgroup(out) HEADER = ("source tree", 'ref tree', 'common\ntips', 'normRF', 'RF', 'maxRF', "%reftree", "%genetree", "subtrees", "treeko\ndist") if args.output: OUT = open(args.output, "w") print >> OUT, '# ' + ctime() print >> OUT, '# ' + ' '.join(sys.argv) print >> OUT, '#' + '\t'.join(HEADER) else: print '# ' + ctime() print '# ' + ' '.join(sys.argv) COL_WIDTHS = [20, 20] + [9] * 10 print_table([HEADER], fix_col_width=COL_WIDTHS, wrap_style='wrap') prev_tree = None ref_fname = os.path.basename(args.reftree) for counter, tfile in enumerate(source_trees): if args.source_file: seedid, tfile = tfile else: seedid = None if args.extract_species: if args.sp_regexp: SPMATCHER = re.compile(args.sp_regexp) get_sp_name = lambda x: re.search(SPMATCHER, x).groups()[0] else: get_sp_name = lambda x: x tt = PhyloTree(tfile, sp_naming_function=get_sp_name) else: tt = Tree(tfile) if args.src_tree_attr: for lf in tt.iter_leaves(): lf._origname = lf.name lf.name = getattr(lf, args.src_tree_attr) if args.outgroup: if len(args.outgroup) > 1: out = tt.get_common_ancestor(args.outgroup) else: out = tt.search_nodes(name=args.outgroup[0])[0] tt.set_outgroup(out) if args.source_trees: fname = os.path.basename(tfile) else: fname = '%05d' % counter r = tt.compare(ref_tree, ref_tree_attr=args.ref_tree_attr, source_tree_attr=args.src_tree_attr, min_support_ref=args.min_support_ref, min_support_source=args.min_support_src, unrooted=args.unrooted, has_duplications=args.extract_species) print_table([ map(istr, [ fname[-30:], ref_fname[-30:], r['effective_tree_size'], r['norm_rf'], r['rf'], r['max_rf'], r["source_edges_in_ref"], r["ref_edges_in_source"], r['source_subtrees'], r['treeko_dist'] ]) ], fix_col_width=COL_WIDTHS, wrap_style='cut') if args.output: OUT.close()
from ete_dev import Tree t = Tree('(A:1,(B:1,(C:1,D:1):0.5):0.5);') # Browse the tree from a specific leaf to the root node = t.search_nodes(name="C")[0] while node: print node node = node.up # --C # /-C # ---------| # \-D # # /-B # ---------| # | /-C # \--------| # \-D # # /-A # ---------| # | /-B # \--------| # | /-C # \--------| # \-D
from ete_dev import Tree #Loads a tree t = Tree( '((H:1,I:1):0.5, A:1, (B:1,(C:1,D:1):0.5):0.5);' ) print t # /-H # /--------| # | \-I # | #---------|--A # | # | /-B # \--------| # | /-C # \--------| # \-D # I get D D = t.search_nodes(name="D") # I get all nodes with distance=0.5 nodes = t.search_nodes(dist=0.5) print len(nodes), "nodes have distance=0.5"
from ete_dev import Tree #Loads a tree t = Tree('((H:1,I:1):0.5, A:1, (B:1,(C:1,D:1):0.5):0.5);') print t # /-H # /--------| # | \-I # | #---------|--A # | # | /-B # \--------| # | /-C # \--------| # \-D # I get D D = t.search_nodes(name="D") # I get all nodes with distance=0.5 nodes = t.search_nodes(dist=0.5) print len(nodes), "nodes have distance=0.5"
def get_rooting(tol, seed_species, agename = False): ''' returns dict of species age for a given TOL and a given seed **Example:** :: tol = "((((((((Drosophila melanogaster,(Drosophila simulans,Drosophila secchellia)),(Drosophila yakuba,Drosophila erecta))[&&NHX:name=melanogaster subgroup],Drosophila ananassae)[&&NHX:name=melanogaster group],(Drosophila pseudoobscura,Drosophila persimilis)[&&NHX:name=obscura group])[&&NHX:name=Sophophora Old World],Drosophila willistoni)[&&NHX:name=subgenus Sophophora],(Drosophila grimshawi,(Drosophila virilis,Drosophila mojavensis))[&&NHX:name=subgenus Drosophila])[&&NHX:name=genus Drosophila],(Anopheles gambiae,Aedes aegypti)[&&NHX:name=Culicidae])[&&NHX:name=Arthropoda],Caenorhabditis elegans)[&&NHX:name=Animalia];" seed = "Drosophila melanogaster" ROOTING, age2name = get_rooting (tol, seed, True) ROOTING == {"Aedes aegypti" : 7, "Anopheles gambiae" : 7, "Caenorhabditis elegans" : 8, "Drosophila ananassae" : 3, "Drosophila erecta" : 2, "Drosophila grimshawi" : 6, "Drosophila melanogaster" : 1, "Drosophila mojavensis" : 6, "Drosophila persimilis" : 4, "Drosophila pseudoobscura": 4, "Drosophila secchellia" : 1, "Drosophila simulans" : 1, "Drosophila virilis" : 6, "Drosophila willistoni" : 5, "Drosophila yakuba" : 2} age2name == {1: "Drosophila melanogaster. Drosophila simulans. Drosophila secchellia", 2: "melanogaster subgroup", 3: "melanogaster group", 4: "Sophophora Old World", 5: "subgenus Sophophora", 6: "genus Drosophila", 7: "Arthropoda", 8: "Animalia"} :argument seed_species: species name :argument False agename: if True, also returns the inverse dictionary :returns: ROOTING dictionary with age of each species ''' tol = Tree (tol) try: node = tol.search_nodes (name=seed_species)[0] except IndexError: exit ('ERROR: Seed species not found in tree\n') age = 1 ROOTING = {} if agename: age2name = {} while not node.is_root(): node = node.up for leaf in node.get_leaf_names(): if agename: if node.name == 'NoName': nam = '.'.join (node.get_leaf_names()) else: nam = node.name age2name.setdefault (age, nam) ROOTING.setdefault (leaf, age) age += 1 if agename: return ROOTING, age2name return ROOTING