Beispiel #1
0
def get_example_tree():

    # sample sequence and a list of example motif types     
    seq = "LHGRISQQVEQSRSQVQAIGEKVSLAQAKIEKIKGSKKAIKVFSSAKYPAPERLQEYGSIFTDAQDPGLQRRPRHRIQSKQRPLDERALQEKLKDFPVCVSTKPEPEDDAEEGLGGLPSNISSVSSLLLFNTTENLYKKYVFLDPLAGAVTKTHVMLGAETEEKLFDAPLSISKREQLEQQVPENYFYVPDLGQVPEIDVPSYLPDLPGIANDLMYIADLGPGIAPSAPGTIPELPTFHTEVAEPLKVGELGSGMGAGPGTPAHTPSSLDTPHFVFQTYKMGAPPLPPSTAAPVGQGARQDDSSSSASPSVQGAPREVVDPSGGWATLLESIRQAGGIGKAKLRSMKERKLEKQQQKEQEQVRATSQGGHLMSDLFNKLVMRRKGISGKGPGAGDGPGGAFARVSDSIPPLPPPQQPQAEDEDDWES"
    motifs = [
        # seq.start, seq.end, shape, width, height, fgcolor, bgcolor
        [10, 100, "[]", None, 10, "black", "rgradient:blue", "arial|8|white|domain Name"],
        [110, 150, "o", None, 10, "blue", "pink", None],
        [155, 180, "()", None, 10, "blue", "rgradient:purple", None],
        [160, 170, "^", None, 14, "black", "yellow", None],
        [172, 180, "v", None, 12, "black", "rgradient:orange", None],
        [185, 190, "o", None, 12, "black", "brown", None],
        [198, 200, "<>", None, 15, "black", "rgradient:gold", None],
        [210, 240, "compactseq", 2, 10, None, None, None],
        [300, 320, "seq", 10, 10, None, None, None],
        [310, 345, "<>", None, 15, "black", "rgradient:black", None],
    ]
    # Create a random tree and add to each leaf a random set of motifs
    # from the original set
    t = Tree()
    t.populate(10)
    for l in t.iter_leaves():
        seq_motifs = [list(m) for m in motifs] #sample(motifs, randint(2, len(motifs))) 

        seqFace = SeqMotifFace(seq, seq_motifs, intermotif_format="line",
                               seqtail_format="compactseq", scale_factor=1)
        seqFace.margin_bottom = 4
        f = l.add_face(seqFace, 0, "aligned")

    return t, TreeStyle()
Beispiel #2
0
def get_example_tree():

    t = Tree()
    t.populate(8)

    # Node style handling is no longer limited to layout functions. You
    # can now create fixed node styles and use them many times, save them
    # or even add them to nodes before drawing (this allows to save and
    # reproduce an tree image design)

    # Set bold red branch to the root node
    style = NodeStyle()
    style["fgcolor"] = "#0f0f0f"
    style["size"] = 0
    style["vt_line_color"] = "#ff0000"
    style["hz_line_color"] = "#ff0000"
    style["vt_line_width"] = 8
    style["hz_line_width"] = 8
    style["vt_line_type"] = 0 # 0 solid, 1 dashed, 2 dotted
    style["hz_line_type"] = 0
    t.set_style(style)

    #Set dotted red lines to the first two branches
    style1 = NodeStyle()
    style1["fgcolor"] = "#0f0f0f"
    style1["size"] = 0
    style1["vt_line_color"] = "#ff0000"
    style1["hz_line_color"] = "#ff0000"
    style1["vt_line_width"] = 2
    style1["hz_line_width"] = 2
    style1["vt_line_type"] = 2 # 0 solid, 1 dashed, 2 dotted
    style1["hz_line_type"] = 2
    t.children[0].img_style = style1
    t.children[1].img_style = style1

    # Set dashed blue lines in all leaves
    style2 = NodeStyle()
    style2["fgcolor"] = "#000000"
    style2["shape"] = "circle"
    style2["vt_line_color"] = "#0000aa"
    style2["hz_line_color"] = "#0000aa"
    style2["vt_line_width"] = 2
    style2["hz_line_width"] = 2
    style2["vt_line_type"] = 1 # 0 solid, 1 dashed, 2 dotted
    style2["hz_line_type"] = 1
    for l in t.iter_leaves():
        l.img_style = style2

    ts = TreeStyle()
    ts.layout_fn = layout
    ts.show_leaf_name = False

    return t, ts
Beispiel #3
0
def get_example_tree():

    t = Tree()
    t.populate(8)

    # Node style handling is no longer limited to layout functions. You
    # can now create fixed node styles and use them many times, save them
    # or even add them to nodes before drawing (this allows to save and
    # reproduce an tree image design)

    # Set bold red branch to the root node
    style = NodeStyle()
    style["fgcolor"] = "#0f0f0f"
    style["size"] = 0
    style["vt_line_color"] = "#ff0000"
    style["hz_line_color"] = "#ff0000"
    style["vt_line_width"] = 8
    style["hz_line_width"] = 8
    style["vt_line_type"] = 0  # 0 solid, 1 dashed, 2 dotted
    style["hz_line_type"] = 0
    t.set_style(style)

    #Set dotted red lines to the first two branches
    style1 = NodeStyle()
    style1["fgcolor"] = "#0f0f0f"
    style1["size"] = 0
    style1["vt_line_color"] = "#ff0000"
    style1["hz_line_color"] = "#ff0000"
    style1["vt_line_width"] = 2
    style1["hz_line_width"] = 2
    style1["vt_line_type"] = 2  # 0 solid, 1 dashed, 2 dotted
    style1["hz_line_type"] = 2
    t.children[0].img_style = style1
    t.children[1].img_style = style1

    # Set dashed blue lines in all leaves
    style2 = NodeStyle()
    style2["fgcolor"] = "#000000"
    style2["shape"] = "circle"
    style2["vt_line_color"] = "#0000aa"
    style2["hz_line_color"] = "#0000aa"
    style2["vt_line_width"] = 2
    style2["hz_line_width"] = 2
    style2["vt_line_type"] = 1  # 0 solid, 1 dashed, 2 dotted
    style2["hz_line_type"] = 1
    for l in t.iter_leaves():
        l.img_style = style2

    ts = TreeStyle()
    ts.layout_fn = layout
    ts.show_leaf_name = False

    return t, ts
Beispiel #4
0
def get_example_tree():
    t = Tree("((a,b),c);")
    
    right_c0_r0 = TextFace("right_col0_row0")
    right_c0_r1 = TextFace("right_col0_row1")
    right_c1_r0 = TextFace("right_col1_row0")
    right_c1_r1 = TextFace("right_col1_row1")
    right_c1_r2 = TextFace("right_col1_row2")

    top_c0_r0 = TextFace("top_col0_row0")
    top_c0_r1 = TextFace("top_col0_row1")

    bottom_c0_r0 = TextFace("bottom_col0_row0")
    bottom_c1_r0 = TextFace("bottom_col1_row0")

    aligned_c0_r0 = TextFace("aligned_col0_row0")
    aligned_c0_r1 = TextFace("aligned_col0_row1")

    aligned_c1_r0 = TextFace("aligned_col1_row0")
    aligned_c1_r1 = TextFace("aligned_col1_row1")


    t.add_face(right_c0_r1, column=1, position="branch-right")
    t.add_face(right_c0_r0, column=0, position="branch-right")

    t.add_face(right_c1_r2, column=2, position="branch-right")
    t.add_face(right_c1_r1, column=1, position="branch-right")
    t.add_face(right_c1_r0, column=0, position="branch-right")

    t.add_face(top_c0_r1, column=1, position="branch-top")
    t.add_face(top_c0_r0, column=0, position="branch-top")

    t.add_face(bottom_c0_r0, column=0, position="branch-bottom")
    t.add_face(bottom_c1_r0, column=1, position="branch-bottom")

    for leaf in t.iter_leaves():
        leaf.add_face(aligned_c0_r1, 0, "aligned")
        leaf.add_face(aligned_c0_r0, 0, "aligned")
        leaf.add_face(aligned_c1_r1, 0, "aligned")
        leaf.add_face(aligned_c1_r0, 0, "aligned")
        
    return t, TreeStyle()
Beispiel #5
0
def get_example_tree():
    t = Tree("((a,b),c);")

    right_c0_r0 = TextFace("right_col0_row0")
    right_c0_r1 = TextFace("right_col0_row1")
    right_c1_r0 = TextFace("right_col1_row0")
    right_c1_r1 = TextFace("right_col1_row1")
    right_c1_r2 = TextFace("right_col1_row2")

    top_c0_r0 = TextFace("top_col0_row0")
    top_c0_r1 = TextFace("top_col0_row1")

    bottom_c0_r0 = TextFace("bottom_col0_row0")
    bottom_c1_r0 = TextFace("bottom_col1_row0")

    aligned_c0_r0 = TextFace("aligned_col0_row0")
    aligned_c0_r1 = TextFace("aligned_col0_row1")

    aligned_c1_r0 = TextFace("aligned_col1_row0")
    aligned_c1_r1 = TextFace("aligned_col1_row1")

    t.add_face(right_c0_r1, column=1, position="branch-right")
    t.add_face(right_c0_r0, column=0, position="branch-right")

    t.add_face(right_c1_r2, column=2, position="branch-right")
    t.add_face(right_c1_r1, column=1, position="branch-right")
    t.add_face(right_c1_r0, column=0, position="branch-right")

    t.add_face(top_c0_r1, column=1, position="branch-top")
    t.add_face(top_c0_r0, column=0, position="branch-top")

    t.add_face(bottom_c0_r0, column=0, position="branch-bottom")
    t.add_face(bottom_c1_r0, column=1, position="branch-bottom")

    for leaf in t.iter_leaves():
        leaf.add_face(aligned_c0_r1, 0, "aligned")
        leaf.add_face(aligned_c0_r0, 0, "aligned")
        leaf.add_face(aligned_c1_r1, 0, "aligned")
        leaf.add_face(aligned_c1_r0, 0, "aligned")

    return t, TreeStyle()
Beispiel #6
0
def get_example_tree():

    # sample sequence and a list of example motif types
    seq = "LHGRISQQVEQSRSQVQAIGEKVSLAQAKIEKIKGSKKAIKVFSSAKYPAPERLQEYGSIFTDAQDPGLQRRPRHRIQSKQRPLDERALQEKLKDFPVCVSTKPEPEDDAEEGLGGLPSNISSVSSLLLFNTTENLYKKYVFLDPLAGAVTKTHVMLGAETEEKLFDAPLSISKREQLEQQVPENYFYVPDLGQVPEIDVPSYLPDLPGIANDLMYIADLGPGIAPSAPGTIPELPTFHTEVAEPLKVGELGSGMGAGPGTPAHTPSSLDTPHFVFQTYKMGAPPLPPSTAAPVGQGARQDDSSSSASPSVQGAPREVVDPSGGWATLLESIRQAGGIGKAKLRSMKERKLEKQQQKEQEQVRATSQGGHLMSDLFNKLVMRRKGISGKGPGAGDGPGGAFARVSDSIPPLPPPQQPQAEDEDDWES"
    motifs = [
        # seq.start, seq.end, shape, width, height, fgcolor, bgcolor
        [
            10, 100, "[]", None, 10, "black", "rgradient:blue",
            "arial|8|white|domain Name"
        ],
        [110, 150, "o", None, 10, "blue", "pink", None],
        [155, 180, "()", None, 10, "blue", "rgradient:purple", None],
        [160, 170, "^", None, 14, "black", "yellow", None],
        [172, 180, "v", None, 12, "black", "rgradient:orange", None],
        [185, 190, "o", None, 12, "black", "brown", None],
        [198, 200, "<>", None, 15, "black", "rgradient:gold", None],
        [210, 240, "compactseq", 2, 10, None, None, None],
        [300, 320, "seq", 10, 10, None, None, None],
        [310, 345, "<>", None, 15, "black", "rgradient:black", None],
    ]
    # Create a random tree and add to each leaf a random set of motifs
    # from the original set
    t = Tree()
    t.populate(10)
    for l in t.iter_leaves():
        seq_motifs = [list(m) for m in motifs
                      ]  #sample(motifs, randint(2, len(motifs)))

        seqFace = SeqMotifFace(seq,
                               seq_motifs,
                               intermotif_format="line",
                               seqtail_format="compactseq",
                               scale_factor=1)
        seqFace.margin_bottom = 4
        f = l.add_face(seqFace, 0, "aligned")

    return t, TreeStyle()
Beispiel #7
0
C.add_features(vowel=False, confidence=1.0)
A.add_features(vowel=True, confidence=0.5)
ancestor.add_features(nodetype="internal")
# Or, using the oneliner notation
(t & "H").add_features(vowel=False, confidence=0.2)
# But we can automatize this. (note that i will overwrite the previous
# values)
for leaf in t.traverse():
    if leaf.name in "AEIOU":
        leaf.add_features(vowel=True, confidence=random.random())
    else:
        leaf.add_features(vowel=False, confidence=random.random())
# Now we use these information to analyze the tree.
print "This tree has", len(t.search_nodes(vowel=True)), "vowel nodes"
print "Which are", [
    leaf.name for leaf in t.iter_leaves() if leaf.vowel == True
]
# But features may refer to any kind of data, not only simple
# values. For example, we can calculate some values and store them
# within nodes.
#
# Let's detect leaf nodes under "ancestor" with distance higher thatn
# 1. Note that I'm traversing a subtree which starts from "ancestor"
matches = [leaf for leaf in ancestor.traverse() if leaf.dist > 1.0]
# And save this pre-computed information into the ancestor node
ancestor.add_feature("long_branch_nodes", matches)
# Prints the precomputed nodes
print "These are nodes under ancestor with long branches", \
    [n.name for n in ancestor.long_branch_nodes]
# We can also use the add_feature() method to dynamically add new features.
label = raw_input("custom label:")
Beispiel #8
0
def main(argv):
    
    parser = argparse.ArgumentParser(description=__DESCRIPTION__, 
                            formatter_class=argparse.RawDescriptionHelpFormatter)


    input_args = parser.add_argument_group("INPUT OPTIONS")
    input_args.add_argument("source_trees", metavar='source_trees', type=str, nargs="*",
                   help='a list of source tree files')
    
    input_args.add_argument("--source_file", dest="source_file", 
                        type=str, 
                        help="""path to a file containing many source trees, one per line""")

    input_args.add_argument("-r", dest="reftree", 
                        type=str, required=True,
                        help="""Reference tree""")

    input_args.add_argument("--ref_tree_attr", dest="ref_tree_attr", 
                            type=str, default="name",
                            help=("attribute in ref tree used as leaf name"))
    
    input_args.add_argument("--src_tree_attr", dest="src_tree_attr", 
                            type=str, default="name",
                            help=("attribute in source tree used as leaf name"))

    input_args.add_argument("--min_support_ref",
                            type=float, default=0.0,
                        help=("min support for branches to be considered from the ref tree"))
    input_args.add_argument("--min_support_src",
                        type=float, default=0.0,
                        help=("min support for branches to be considered from the source tree"))

    
    output_args = parser.add_argument_group("OUTPUT OPTIONS")
    
    output_args.add_argument("-o", dest="output", 
                            type=str,
                            help="""Path to the tab delimited report file""")

    
    opt_args = parser.add_argument_group("DISTANCE OPTIONS")
    

    opt_args.add_argument("--outgroup", dest="outgroup", 
                        nargs = "+",
                        help="""outgroup used to root reference and source trees before distance computation""")
  
    opt_args.add_argument("--expand_polytomies", dest="polytomies", 
                        action = "store_true",
                        help="""expand politomies if necessary""")
  
    opt_args.add_argument("--unrooted", dest="unrooted", 
                        action = "store_true",
                        help="""compare trees as unrooted""")

    opt_args.add_argument("--min_support", dest="min_support", 
                        type=float, default=0.0,
                        help=("min support value for branches to be counted in the distance computation (RF, treeko and refTree/targeGene compatibility)"))

    opt_args = parser.add_argument_group("PHYLOGENETICS OPTIONS")
    
    opt_args.add_argument("--extract_species",
                        action = "store_true",
                        help="When used, leaf names in the reference and source trees are assumed to represent species."
                          " If target trees are gene-trees whose species information is encoded as a part of the leaf sequence name,"
                          " it can be automatically extracted by providing a Perl regular expression that extract a "
                          " valid species code (see --sp_regexp). Such information will be also used to detect duplication"
                          " events. ")

    opt_args.add_argument("--sp_regexp", 
                          type=str,
                         help=("Specifies a Perl regular expression to automatically extract species names"
                          " from the name string in source trees. If not used, leaf names are assumed to represent species names."
                          " Example: use this expression '[^_]+_(.+)' to extract HUMAN from the string 'P53_HUMAN'."))
        
    opt_args.add_argument("--collateral", 
                        action='store_true', 
                        help=(""))

    
    args = parser.parse_args(argv)
    print __DESCRIPTION__
    reftree = args.reftree
    if args.source_file and args.source_trees:
        print >>sys.stderr, 'The use of targets_file and targets at the same time is not supported.'
        sys.exit(1)
        
    if args.source_file:
        source_trees = tree_iterator(args.source_file)
    else:
        source_trees = args.source_trees
        
    ref_tree = Tree(reftree)

    if args.ref_tree_attr:
        for lf in ref_tree.iter_leaves():
            lf._origname = lf.name
            if args.ref_tree_attr not in lf.features:
                print lf
            lf.name = getattr(lf, args.ref_tree_attr)
    
    if args.outgroup:
        if len(args.outgroup) > 1:
            out = ref_tree.get_common_ancestor(args.outgroup)
        else:
            out = ref_tree.search_nodes(name=args.outgroup[0])[0]
        ref_tree.set_outgroup(out)
                     

    HEADER = ("source tree", 'ref tree', 'common\ntips', 'normRF', 'RF', 'maxRF', "%reftree", "%genetree", "subtrees", "treeko\ndist")
    if args.output:
        OUT = open(args.output, "w")
        print >>OUT, '# ' + ctime()
        print >>OUT, '# ' + ' '.join(sys.argv) 
        print >>OUT, '#'+'\t'.join(HEADER)
    else:
        print '# ' + ctime()
        print '# ' + ' '.join(sys.argv) 
        COL_WIDTHS = [20, 20] + [9] * 10
        print_table([HEADER], fix_col_width=COL_WIDTHS, wrap_style='wrap')
        
                
    prev_tree = None
    ref_fname = os.path.basename(args.reftree)
    for counter, tfile in enumerate(source_trees):
        if args.source_file:
            seedid, tfile = tfile
        else:
            seedid = None
           
        if args.extract_species:

            if args.sp_regexp:
                SPMATCHER = re.compile(args.sp_regexp)
                get_sp_name = lambda x: re.search(SPMATCHER, x).groups()[0]
            else:
                get_sp_name = lambda x: x
                
            tt = PhyloTree(tfile, sp_naming_function = get_sp_name)
        else:
            tt = Tree(tfile)

        if args.src_tree_attr:
            for lf in tt.iter_leaves():
                lf._origname = lf.name
                lf.name = getattr(lf, args.src_tree_attr)
            
        if args.outgroup:
            if len(args.outgroup) > 1:
                out = tt.get_common_ancestor(args.outgroup)
            else:
                out = tt.search_nodes(name=args.outgroup[0])[0]
            tt.set_outgroup(out)
        
        if args.source_trees:
            fname = os.path.basename(tfile)
        else:
            fname = '%05d' %counter                          


            
        r = tt.compare(ref_tree, 
                       ref_tree_attr=args.ref_tree_attr,
                       source_tree_attr=args.src_tree_attr,
                       min_support_ref=args.min_support_ref,
                       min_support_source = args.min_support_src,
                       unrooted=args.unrooted,
                       has_duplications=args.extract_species)

                          

        print_table([map(istr, [fname[-30:], ref_fname[-30:], r['effective_tree_size'], r['norm_rf'],
                               r['rf'], r['max_rf'], r["source_edges_in_ref"],
                               r["ref_edges_in_source"], r['source_subtrees'], r['treeko_dist']])],
                    fix_col_width = COL_WIDTHS, wrap_style='cut')
                          

    if args.output:
        OUT.close()
Beispiel #9
0
import time
from ete_dev import Tree
# Creates a random tree with 10,000 leaf nodes
tree = Tree()
tree.populate(10000)
# This code should be faster
t1 = time.time()
for leaf in tree.iter_leaves():
    if "aw" in leaf.name:
        print "found a match:", leaf.name,
        break
print "Iterating: ellapsed time:", time.time()-t1
# This slower
t1 = time.time()
for leaf in tree.get_leaves():
    if "aw" in leaf.name:
        print "found a match:", leaf.name,
        break
print "Getting: ellapsed time:", time.time()-t1
# Results in something like:
# found a match: guoaw Iterating: ellapsed time: 0.00436091423035 secs
# found a match: guoaw Getting: ellapsed time: 0.124316930771 secs
Beispiel #10
0
import time
from ete_dev import Tree
# Creates a random tree with 10,000 leaf nodes
tree = Tree()
tree.populate(10000)
# This code should be faster
t1 = time.time()
for leaf in tree.iter_leaves():
    if "aw" in leaf.name:
        print "found a match:", leaf.name,
        break
print "Iterating: ellapsed time:", time.time() - t1
# This slower
t1 = time.time()
for leaf in tree.get_leaves():
    if "aw" in leaf.name:
        print "found a match:", leaf.name,
        break
print "Getting: ellapsed time:", time.time() - t1
# Results in something like:
# found a match: guoaw Iterating: ellapsed time: 0.00436091423035 secs
# found a match: guoaw Getting: ellapsed time: 0.124316930771 secs
Beispiel #11
0
style1["fgcolor"] = "#0f0f0f"
style1["size"] = 0
style1["vt_line_color"] = "#ff0000"
style1["hz_line_color"] = "#ff0000"
style1["vt_line_width"] = 2
style1["hz_line_width"] = 2
style1["vt_line_type"] = 2 # 0 solid, 1 dashed, 2 dotted
style1["hz_line_type"] = 2
t.children[0].img_style = style1
t.children[1].img_style = style1

# Set dashed blue lines in all leaves
style2 = NodeStyle()
style2["fgcolor"] = "#000000"
style2["shape"] = "circle"
style2["vt_line_color"] = "#0000aa"
style2["hz_line_color"] = "#0000aa"
style2["vt_line_width"] = 2
style2["hz_line_width"] = 2
style2["vt_line_type"] = 1 # 0 solid, 1 dashed, 2 dotted
style2["hz_line_type"] = 1
for l in t.iter_leaves():
    l.img_style = style2


ts = TreeStyle()
ts.layout_fn = layout
ts.show_leaf_name = False
t.render("node_style.png", w=400, tree_style=ts)

Beispiel #12
0
aligned_c0_r0 = TextFace("aligned_col0_row0")
aligned_c0_r1 = TextFace("aligned_col0_row1")

aligned_c1_r0 = TextFace("aligned_col1_row0")
aligned_c1_r1 = TextFace("aligned_col1_row1")


t.add_face(right_c0_r1, column=1, position="branch-right")
t.add_face(right_c0_r0, column=0, position="branch-right")

t.add_face(right_c1_r2, column=2, position="branch-right")
t.add_face(right_c1_r1, column=1, position="branch-right")
t.add_face(right_c1_r0, column=0, position="branch-right")

t.add_face(top_c0_r1, column=1, position="branch-top")
t.add_face(top_c0_r0, column=0, position="branch-top")

t.add_face(bottom_c0_r0, column=0, position="branch-bottom")
t.add_face(bottom_c1_r0, column=1, position="branch-bottom")

for leaf in t.iter_leaves():
    leaf.add_face(aligned_c0_r1, 0, "aligned")
    leaf.add_face(aligned_c0_r0, 0, "aligned")
    leaf.add_face(aligned_c1_r1, 0, "aligned")
    leaf.add_face(aligned_c1_r0, 0, "aligned")


t.show()

Beispiel #13
0
#  used to add many features at the same time.
C.add_features(vowel=False, confidence=1.0)
A.add_features(vowel=True, confidence=0.5)
ancestor.add_features(nodetype="internal")
# Or, using the oneliner notation
(t&"H").add_features(vowel=False, confidence=0.2)
# But we can automatize this. (note that i will overwrite the previous
# values)
for leaf in t.traverse():
    if leaf.name in "AEIOU":
        leaf.add_features(vowel=True, confidence=random.random())
    else:
        leaf.add_features(vowel=False, confidence=random.random())
# Now we use these information to analyze the tree.
print "This tree has", len(t.search_nodes(vowel=True)), "vowel nodes"
print "Which are", [leaf.name for leaf in t.iter_leaves() if leaf.vowel==True]
# But features may refer to any kind of data, not only simple
# values. For example, we can calculate some values and store them
# within nodes.
#
# Let's detect leaf nodes under "ancestor" with distance higher thatn
# 1. Note that I'm traversing a subtree which starts from "ancestor"
matches = [leaf for leaf in ancestor.traverse() if leaf.dist>1.0]
# And save this pre-computed information into the ancestor node
ancestor.add_feature("long_branch_nodes", matches)
# Prints the precomputed nodes
print "These are nodes under ancestor with long branches", \
    [n.name for n in ancestor.long_branch_nodes]
# We can also use the add_feature() method to dynamically add new features.
label = raw_input("custom label:")
value = raw_input("custom label value:")
Beispiel #14
0
def main(argv):

    parser = argparse.ArgumentParser(
        description=__DESCRIPTION__,
        formatter_class=argparse.RawDescriptionHelpFormatter)

    input_args = parser.add_argument_group("INPUT OPTIONS")
    input_args.add_argument("source_trees",
                            metavar='source_trees',
                            type=str,
                            nargs="*",
                            help='a list of source tree files')

    input_args.add_argument(
        "--source_file",
        dest="source_file",
        type=str,
        help="""path to a file containing many source trees, one per line""")

    input_args.add_argument("-r",
                            dest="reftree",
                            type=str,
                            required=True,
                            help="""Reference tree""")

    input_args.add_argument("--ref_tree_attr",
                            dest="ref_tree_attr",
                            type=str,
                            default="name",
                            help=("attribute in ref tree used as leaf name"))

    input_args.add_argument(
        "--src_tree_attr",
        dest="src_tree_attr",
        type=str,
        default="name",
        help=("attribute in source tree used as leaf name"))

    input_args.add_argument(
        "--min_support_ref",
        type=float,
        default=0.0,
        help=("min support for branches to be considered from the ref tree"))
    input_args.add_argument(
        "--min_support_src",
        type=float,
        default=0.0,
        help=(
            "min support for branches to be considered from the source tree"))

    output_args = parser.add_argument_group("OUTPUT OPTIONS")

    output_args.add_argument("-o",
                             dest="output",
                             type=str,
                             help="""Path to the tab delimited report file""")

    opt_args = parser.add_argument_group("DISTANCE OPTIONS")

    opt_args.add_argument(
        "--outgroup",
        dest="outgroup",
        nargs="+",
        help=
        """outgroup used to root reference and source trees before distance computation"""
    )

    opt_args.add_argument("--expand_polytomies",
                          dest="polytomies",
                          action="store_true",
                          help="""expand politomies if necessary""")

    opt_args.add_argument("--unrooted",
                          dest="unrooted",
                          action="store_true",
                          help="""compare trees as unrooted""")

    opt_args.add_argument(
        "--min_support",
        dest="min_support",
        type=float,
        default=0.0,
        help=
        ("min support value for branches to be counted in the distance computation (RF, treeko and refTree/targeGene compatibility)"
         ))

    opt_args = parser.add_argument_group("PHYLOGENETICS OPTIONS")

    opt_args.add_argument(
        "--extract_species",
        action="store_true",
        help=
        "When used, leaf names in the reference and source trees are assumed to represent species."
        " If target trees are gene-trees whose species information is encoded as a part of the leaf sequence name,"
        " it can be automatically extracted by providing a Perl regular expression that extract a "
        " valid species code (see --sp_regexp). Such information will be also used to detect duplication"
        " events. ")

    opt_args.add_argument(
        "--sp_regexp",
        type=str,
        help=
        ("Specifies a Perl regular expression to automatically extract species names"
         " from the name string in source trees. If not used, leaf names are assumed to represent species names."
         " Example: use this expression '[^_]+_(.+)' to extract HUMAN from the string 'P53_HUMAN'."
         ))

    opt_args.add_argument("--collateral", action='store_true', help=(""))

    args = parser.parse_args(argv)
    print __DESCRIPTION__
    reftree = args.reftree
    if args.source_file and args.source_trees:
        print >> sys.stderr, 'The use of targets_file and targets at the same time is not supported.'
        sys.exit(1)

    if args.source_file:
        source_trees = tree_iterator(args.source_file)
    else:
        source_trees = args.source_trees

    ref_tree = Tree(reftree)

    if args.ref_tree_attr:
        for lf in ref_tree.iter_leaves():
            lf._origname = lf.name
            if args.ref_tree_attr not in lf.features:
                print lf
            lf.name = getattr(lf, args.ref_tree_attr)

    if args.outgroup:
        if len(args.outgroup) > 1:
            out = ref_tree.get_common_ancestor(args.outgroup)
        else:
            out = ref_tree.search_nodes(name=args.outgroup[0])[0]
        ref_tree.set_outgroup(out)

    HEADER = ("source tree", 'ref tree', 'common\ntips', 'normRF', 'RF',
              'maxRF', "%reftree", "%genetree", "subtrees", "treeko\ndist")
    if args.output:
        OUT = open(args.output, "w")
        print >> OUT, '# ' + ctime()
        print >> OUT, '# ' + ' '.join(sys.argv)
        print >> OUT, '#' + '\t'.join(HEADER)
    else:
        print '# ' + ctime()
        print '# ' + ' '.join(sys.argv)
        COL_WIDTHS = [20, 20] + [9] * 10
        print_table([HEADER], fix_col_width=COL_WIDTHS, wrap_style='wrap')

    prev_tree = None
    ref_fname = os.path.basename(args.reftree)
    for counter, tfile in enumerate(source_trees):
        if args.source_file:
            seedid, tfile = tfile
        else:
            seedid = None

        if args.extract_species:

            if args.sp_regexp:
                SPMATCHER = re.compile(args.sp_regexp)
                get_sp_name = lambda x: re.search(SPMATCHER, x).groups()[0]
            else:
                get_sp_name = lambda x: x

            tt = PhyloTree(tfile, sp_naming_function=get_sp_name)
        else:
            tt = Tree(tfile)

        if args.src_tree_attr:
            for lf in tt.iter_leaves():
                lf._origname = lf.name
                lf.name = getattr(lf, args.src_tree_attr)

        if args.outgroup:
            if len(args.outgroup) > 1:
                out = tt.get_common_ancestor(args.outgroup)
            else:
                out = tt.search_nodes(name=args.outgroup[0])[0]
            tt.set_outgroup(out)

        if args.source_trees:
            fname = os.path.basename(tfile)
        else:
            fname = '%05d' % counter

        r = tt.compare(ref_tree,
                       ref_tree_attr=args.ref_tree_attr,
                       source_tree_attr=args.src_tree_attr,
                       min_support_ref=args.min_support_ref,
                       min_support_source=args.min_support_src,
                       unrooted=args.unrooted,
                       has_duplications=args.extract_species)

        print_table([
            map(istr, [
                fname[-30:], ref_fname[-30:], r['effective_tree_size'],
                r['norm_rf'], r['rf'], r['max_rf'], r["source_edges_in_ref"],
                r["ref_edges_in_source"], r['source_subtrees'],
                r['treeko_dist']
            ])
        ],
                    fix_col_width=COL_WIDTHS,
                    wrap_style='cut')

    if args.output:
        OUT.close()
Beispiel #15
0
    ],
    [160, 170, "^", 50, 14, "black", "yellow", None],
    [172, 180, "v", 20, 12, "black", "rgradient:orange", None],
    [185, 190, "o", 12, 12, "black", "brown", "Arial|12|white|Domain"],
    [198, 200, "<>", 15, 15, "black", "rgradient:gold", None],
    [210, 240, "compactseq", 2, 10, None, None, None],
    [300, 320, "seq", 20, 20, None, None, "Courier|10|red|TestName"],
    [
        340, 350, "<>", 15, 15, "black", "rgradient:black",
        "Arial|12|black|HOLASEQ"
    ],
]

# Create a random tree and add to each leaf a random set of motifs
# from the original set
t = Tree()
t.populate(20)
for l in t.iter_leaves():
    # For each leaf, we create a random list of motifs from the original list.
    seq_motifs = sample(motifs, randint(2, len(motifs)))
    # And we add it as a Sequence Motif Face.
    seqFace = SeqMotifFace(seq,
                           seq_motifs,
                           intermotif_format="line",
                           seqtail_format="compactseq")
    seqFace.margin_bottom = 4
    f = l.add_face(seqFace, 0, "aligned")

t.render("seq_motif_faces.png", w=1000)
#t.show()