예제 #1
0
    def test_shortcut_functions(self):
        t = PhyloTree(
            """((((Human_1, Chimp_1), (Human_2, (Chimp_2, Chimp_3))),
            ((Fish_1, (Human_3, Fish_3)), Yeast_2)), Yeast_1);""")
        t.set_species_naming_function(lambda node: node.name.split("_")[0])
        t.get_descendant_evol_events()  # DDDSSSDDS

        root = t.get_tree_root()
        # Detects two consecutive nodes with duplications
        pattern0 = """('n_duplications(@) > 0')'n_duplications(@) > 0 '; """
        pattern1 = """( 'contains_leaves(@, ["Chimp_2", "Chimp_3"])'); """
        pattern2 = """'n_speciations(@) > 3 '; """

        pattern0 = TreePattern(pattern0)
        pattern1 = TreePattern(pattern1)
        pattern2 = TreePattern(pattern2)

        pattern0_match = list(pattern0.find_match(t, maxhits=None))
        pattern1_match = list(pattern1.find_match(t, maxhits=None))
        pattern2_match = list(pattern2.find_match(t, maxhits=None))

        self.assertEqual(len(pattern0_match), 5)

        self.assertEqual(len(pattern1_match), 4)
        self.assertEqual(pattern1_match[0], root)

        self.assertEqual(len(pattern2_match), 2)
        self.assertEqual(pattern2_match[0], root)
        self.assertEqual(pattern2_match[1], root.children[0])
예제 #2
0
def run(args):
    from ete3 import Tree, PhyloTree
    for nw in args.src_tree_iterator:
        if args.orthologs is not None:
            t = PhyloTree(nw)
            for e in t.get_descendant_evol_events():
                print(e.in_seqs, e.out_seqs)
예제 #3
0
파일: ete_extract.py 프로젝트: Ward9250/ete
def run(args):
    from ete3 import Tree, PhyloTree
    for nw in args.src_tree_iterator:
        if args.orthologs is not None:
            t = PhyloTree(nw)
            for e in t.get_descendant_evol_events():
                print(e.in_seqs, e.out_seqs)
예제 #4
0
    SPECIES_NAME_POS = args.species_field
    SPECIES_NAME_DELIMITER = args.species_delimiter

    # load a phylomeDB Tree provided as a newick file in the command line
    t = PhyloTree(newick, sp_naming_function=extract_spname)

    if args.root:
        if len(args.root) > 1:
            outgroup = t.get_common_ancestor(args.root)
        else:
            outgroup = t & args.root[0]
        t.set_outgroup(outgroup)

    if not args.skip_ortholog_detection:
        # detect speciation and duplication events using the species overlap
        # algorithm used in phylomeDB
        t.get_descendant_evol_events()

    if args.ascii:
        print(
            t.get_ascii(attributes=[args.evoltype_attr, "name"],
                        show_internal=True))

    if args.newick:
        print(t.write(features=[args.evoltype_attr], format_root_node=True))

    if args.show:
        t.show()

    export_as_orthoXML(t, args.database, args.evoltype_attr)
tphy.set_species_naming_function(get_species_name)
for n in tphy.get_leaves():
    print("node:", n.name, "Species name:", n.species)

# In[]:
# find evolutionary events using tree reconciliation
tree_rec, evev_rec = tphy.reconcile(tsps)

# In[]:
print(tree_rec)
tree_rec.show()

# In[]:
# find evolutionary events using species overlap
evev = tphy.get_descendant_evol_events()

for ev in evev:
    if ev.etype == "S":
        for s in ev.in_seqs:
            if s.startswith("Lepsal"):
                print(ev.orthologs)

fseqs = lambda slist: [
    s for s in slist if s.startswith("Drer") or s.startswith("Hsap")
]
print("\nOrthology relationships among Anogam and Anosin")
for ev in evev:
    if ev.etype == "D":
        print('Paralog: ', ','.join(fseqs(ev.in_seqs)), "<====>",
              ','.join(fseqs(ev.out_seqs)))
예제 #6
0
from ete3 import PhyloTree

# read tree from file
phy = PhyloTree("adar_hol.01.iqt.contree.newick")

# assign species names to tree
phy.set_species_naming_function(lambda node: node.name.split("_")[0])
for n in phy.get_leaves():
    print("node:", n.name, "Species name:", n.species)

# root tree
phy_outgroup = phy.get_midpoint_outgroup()
phy.set_outgroup(phy_outgroup)

# find evolutionary events
evev = phy.get_descendant_evol_events(sos_thr=0.9)

for ev in evev:
    if ev.etype == "S":
        print(ev.orthologs)

# find evolutionary events
evev = phy.get_descendant_evol_events(sos_thr=0.9)

# all events
for ev in evev:
    print(ev.etype, ','.join(ev.in_seqs), "<====>", ','.join(ev.out_seqs))

# all events involving either Hsap or Drer
fseqs = lambda slist: [
    s for s in slist if s.startswith("Drer") or s.startswith("Hsap")
# To obtain all the evolutionary events involving a given leaf node we
# use get_my_evol_events method
matches = t.search_nodes(name="Hsa_001")
human_seq = matches[0]
# Obtains its evolutionary events
events = human_seq.get_my_evol_events()
# Print its orthology and paralogy relationships
print "Events detected that involve Hsa_001:"
for ev in events:
    if ev.etype == "S":
        print '   ORTHOLOGY RELATIONSHIP:', ','.join(ev.in_seqs), "<====>", ','.join(ev.out_seqs)
    elif ev.etype == "D":
        print '   PARALOGY RELATIONSHIP:', ','.join(ev.in_seqs), "<====>", ','.join(ev.out_seqs)

# Alternatively, you can scan the whole tree topology
events = t.get_descendant_evol_events()
# Print its orthology and paralogy relationships
print "Events detected from the root of the tree"
for ev in events:
    if ev.etype == "S":
        print '   ORTHOLOGY RELATIONSHIP:', ','.join(ev.in_seqs), "<====>", ','.join(ev.out_seqs)
    elif ev.etype == "D":
        print '   PARALOGY RELATIONSHIP:', ','.join(ev.in_seqs), "<====>", ','.join(ev.out_seqs)

# If we are only interested in the orthology and paralogy relationship
# among a given set of species, we can filter the list of sequences
#
# fseqs is a function that, given a list of sequences, returns only
# those from human and mouse
fseqs = lambda slist: [s for s in slist if s.startswith("Hsa") or s.startswith("Mms")]
print "Paralogy relationships among human and mouse"
예제 #8
0
def process_tree(treepath):
    ''' processes a tree to extract orthology relationships between target taxid and the rest
     of species, organized by orthology type and species code '''
    treepath = str(treepath)
    treepath = treepath.rstrip()
    t = PhyloTree(treepath, sp_naming_function=get_species)
    # traverse all leaves in tree file and get taxid
    leaf_count = 0
    for leaf in t:
        leaf_count += 1
        tax = int(leaf.name.split(".", 1)[0])

        #get scientific name and convert taxid from int to str
        sci_name = names.get(tax)
        leaf.taxid = str(tax)

        #rename leaves names
        try:
            good_name = "%s" % (conversion[leaf.name][0])
        except:
            good_name = leaf.name

        good_name = re.sub("[ |\t,:)(;\n\]\[]+", "_", good_name)
        leaf.good_name = good_name

    #obtain cluster name from tree file path
    clus_name = os.path.split(treepath)[-1].replace(".fa.final_tree.nw", "")
    try:
        base_name = conversion[clus_name][0].replace('|', '_')
    except:
        base_name = clus_name[0]
    t.dist = 0

    #colapses plat specific
    node2content = t.get_cached_content()
    target_species = set([target_taxid])

    def is_sp_specific(_node):
        _species = set([_leaf.species for _leaf in node2content[_node]])
        if not (_species - target_species):
            return True
        return False

    #traverse only lamprey leaves
    if collapse == 'yes':
        for n in t.get_leaves(is_leaf_fn=is_sp_specific):
            if n.children:
                for ch in n.get_children():
                    ch.detach()
                n.taxid = target_taxid
                n.name = "%s" % ('|'.join(
                    [_lf.name for _lf in node2content[n]]))
                n.good_name = "{%s}" % ('|'.join(
                    [_lf.good_name for _lf in node2content[n]]))

    #set outgroup
    outgroup = t.get_midpoint_outgroup()
    try:
        t.set_outgroup(outgroup)
    except:
        if len(t) == 1:
            return
        else:
            raise

    node2content = t.get_cached_content()

    event_lines = []
    for ev in t.get_descendant_evol_events():
        if ev.etype == "S":

            source_seqs = node2content[ev.node.children[0]]
            ortho_seqs = node2content[ev.node.children[1]]

            sp_1 = set()
            for leaf in source_seqs:
                sp_1.add(leaf.taxid)
            sp_2 = set()
            for leaf in ortho_seqs:
                sp_2.add(leaf.taxid)

            if str(target_taxid) in sp_1:
                source_seqs, ortho_seqs = source_seqs, ortho_seqs
            elif str(target_taxid) in sp_2:
                source_seqs, ortho_seqs = ortho_seqs, source_seqs
            else:
                continue

            #co_orthologs is a list with lamprey seed in source_seqs
            co_orthologs = [
                leaf.good_name for leaf in source_seqs
                if leaf.taxid == str(target_taxid)
            ]
            co_orthologs.sort()

            #orthologs is a list of all ortho_seqs names
            orthologs = defaultdict(set)
            for leaf in ortho_seqs:
                sp = int(leaf.taxid)
                orthologs[sp].add(leaf.good_name)

            if len(co_orthologs) == 1:
                _otype = "one-to-"
            else:
                _otype = "many-to-"

            for sp, orth in orthologs.iteritems():
                if len(orth) == 1:
                    otype = _otype + "one"
                else:
                    otype = _otype + "many"

                event_lines.append('\t'.join([
                    ','.join(co_orthologs), otype,
                    str(sp), names[sp], ','.join(sorted(orth)), '\n'
                ]))
    return event_lines
def process_tree(treepath):
    ''' processes a tree to extract orthology relationships between target taxid and the rest
     of species, organized by orthology type and species code '''
    treepath = str(treepath)
    treepath = treepath.rstrip()
    t = PhyloTree(treepath, sp_naming_function=get_species)
    treefile = os.path.basename(treepath)
    t.dist = 0

    outgroup = t.get_midpoint_outgroup()
    try:
        t.set_outgroup(outgroup)
        t.standardize()
    except:
        if args.pairs_table:
            if len(t) == 1:
                sys.stderr.write(treefile + 'len(t) == 1' + '\n')
                return ([], [])
                #return (['aa', 'aa'] ,[['aa', 'aa']])

            else:
                sys.stderr.write(treefile + 'len(t) != 1' + '\n')
                l = t.get_leaf_names()
                r = l[0]
                t.set_outgroup(r)
                pass
                #return ([],[])
                #return  (['None', 'None'] ,[['None', 'None']])
        else:
            if len(t) == 1:
                sys.stderr.write(treefile + 'len(t) == 1' + '\n')
                return []
            else:
                sys.stderr.write(treefile + 'len(t) != 1' + '\n')
                return []

    names = {}
    for leaf in t:
        try:
            sp = str(leaf.name.split('.')[0])
            leaf.taxid = str(sp)
            sci_name = ncbi.get_taxid_translator([sp])
            names[sp] = sci_name[int(sp)]

        except:
            names[sp] = ''

        if args.conv_table:
            try:
                good_name = "%s" % (conversion[leaf.name][0])
            except:
                good_name = leaf.name
            leaf.good_name = good_name

    node2content = t.get_cached_content()
    target_species = set([target_taxid])

    def is_sp_specific(_node):
        _species = set([_leaf.species for _leaf in node2content[_node]])
        if not (_species - target_species):
            return True
        return False

    #traverse only target taxid leaves
    if collapse == 'yes':
        for n in t.get_leaves(is_leaf_fn=is_sp_specific):
            if n.children:
                for ch in n.get_children():
                    ch.detach()
                n.taxid = target_taxid
                n.name = "{%s}" % ('|'.join(
                    [_lf.name for _lf in node2content[n]]))
                if args.conv_table:
                    n.good_name = "{%s}" % ('|'.join(
                        [_lf.good_name for _lf in node2content[n]]))

    all_ortholgs_tree = []
    all_ortholgs_pairs = []
    event_lines = []

    for ev in t.get_descendant_evol_events():
        if ev.etype == "S":
            source_seqs = ev.node.children[0]
            ortho_seqs = ev.node.children[1]

            if target_taxid:
                sp_1 = set()
                for leaf in source_seqs:
                    sp_1.add(leaf.taxid)
                sp_2 = set()
                for leaf in ortho_seqs:
                    sp_2.add(leaf.taxid)

                if str(target_taxid) in sp_1:
                    source_seqs, ortho_seqs = source_seqs, ortho_seqs
                elif str(target_taxid) in sp_2:
                    source_seqs, ortho_seqs = ortho_seqs, source_seqs
                else:
                    continue

            if args.conv_table:
                co_orthologs = [leaf.good_name for leaf in source_seqs]
                co_orthologs.sort()
            else:
                co_orthologs = [leaf.name for leaf in source_seqs]
                co_orthologs.sort()

            orthologs = defaultdict(set)
            for leaf in ortho_seqs:
                sp = str(leaf.name.split('.')[0])
                if args.conv_table:
                    orthologs[sp].add(leaf.good_name)
                else:
                    orthologs[sp].add(leaf.name)

            if len(source_seqs) == 1:
                _otype = "one-to-"
            else:
                _otype = "many-to-"

            for sp, orth in orthologs.items():
                if len(orth) == 1:
                    otype = _otype + "one"
                else:
                    otype = _otype + "many"

                event_lines.append('\t'.join([
                    ','.join(co_orthologs), otype,
                    str(sp), ','.join(sorted(orth)), treefile, names[sp], '\n'
                ]))

            if args.pairs_table:

                source_seqs_names = []
                ortho_seqs_names = []

                for node in source_seqs:
                    for leaf in node:
                        if args.conv_table:
                            name = leaf.good_name
                        else:
                            name = leaf.name
                        source_seqs_names.append(name)

                for node in ortho_seqs:
                    for leaf in node:
                        if args.conv_table:
                            name = leaf.good_name
                        else:
                            name = leaf.name
                        ortho_seqs_names.append(name)

                all_ortholgs_node = itertools.product(source_seqs_names,
                                                      ortho_seqs_names)
                all_ortholgs_tree.append(all_ortholgs_node)

                for node in all_ortholgs_tree:
                    for pair in node:
                        all_ortholgs_pairs.append(pair)

                #return (event_lines, all_ortholgs_pairs)

    if args.pairs_table:
        return (event_lines, all_ortholgs_pairs)
    else:
        return (event_lines)