## From a newick tree and a list of sample, find the TMRCA of these samples
from ete3 import PhyloTree
from optparse import OptionParser

parser = OptionParser()
parser.add_option("-f", "--file", dest="filename",
	help="File containing newick tree", metavar="FILE")
parser.add_option("-s", "--species", dest="speciesList",
	help="file containing list of wanted species, one per line", metavar="FILE")
(options, args) = parser.parse_args()

t= PhyloTree(options.filename, format=1)

with open(options.speciesList) as f:
	liste = f.read().splitlines()

pp=t.get_common_ancestor(liste)
print pp.name
예제 #2
0
#                       \--------|
#                                 \-Mmu_002
# Create a dictionary with relative ages for the species present in
# the phylogenetic tree.  Note that ages are only relative numbers to
# define which species are older, and that different species can
# belong to the same age.
species2age = {
    "Hsa": 1,  # H**o sapiens (Hominids)
    "Ptr": 2,  # P. troglodytes (primates)
    "Mmu": 2,  # Macaca mulata (primates)
    "Mms": 3,  # Mus musculus (mammals)
    "Cfa": 3,  # Canis familiaris (mammals)
    "Dme": 4,  # Drosophila melanogaster (metazoa)
}
# We can translate each number to its correspondig taxonomic number
age2name = {1: "hominids", 2: "primates", 3: "mammals", 4: "metazoa"}
event1 = t.get_common_ancestor("Hsa_001", "Hsa_004")
event2 = t.get_common_ancestor("Hsa_001", "Hsa_002")
print
print "The duplication event leading to the human sequences Hsa_001 and " + "Hsa_004 is dated at: ", age2name[
    event1.get_age(species2age)
]
print "The duplication event leading to the human sequences Hsa_001 and " + "Hsa_002 is dated at: ", age2name[
    event2.get_age(species2age)
]
# The duplication event leading to the human sequences Hsa_001 and Hsa_004
# is dated at:  primates
#
# The duplication event leading to the human sequences Hsa_001 and Hsa_002
# is dated at:  mammals
예제 #3
0
        help=("print the extended newick format for provided tree using"
              " ASCII representation and all its evolutionary events"
              " before orthoXML export"))

    args = parser.parse_args()
    newick = args.tree[0]

    SPECIES_NAME_POS = args.species_field
    SPECIES_NAME_DELIMITER = args.species_delimiter

    # load a phylomeDB Tree provided as a newick file in the command line
    t = PhyloTree(newick, sp_naming_function=extract_spname)

    if args.root:
        if len(args.root) > 1:
            outgroup = t.get_common_ancestor(args.root)
        else:
            outgroup = t & args.root[0]
        t.set_outgroup(outgroup)

    if not args.skip_ortholog_detection:
        # detect speciation and duplication events using the species overlap
        # algorithm used in phylomeDB
        t.get_descendant_evol_events()

    if args.ascii:
        print(
            t.get_ascii(attributes=[args.evoltype_attr, "name"],
                        show_internal=True))

    if args.newick:
예제 #4
0
        File.write("\n".join(HomologySummary)+"\n")


EventSummary = []
i=0
for n in recon_tree.traverse("postorder"):
    n.ND = i
    if n.is_leaf():
        n.S = sp_dict[n.species]
    if "evoltype" in dir(n):
        n.Ev = n.evoltype
        if n.evoltype == "L":
            EventSummary.append("event(%i,loss)" %(n.S))
        elif n.evoltype == "D":
            sp_dup = n.get_species()
            oldest_sp = sptree.get_common_ancestor(sp_dup)
            n.S = oldest_sp.S
            logger.debug("sp_dup: %s ",sp_dup)
            EventSummary.append("event(%i,duplication)" %(n.S))
    else:
        n.Ev = "S"
    logger.debug("name: %s",n.name)
    logger.debug("S: %s",n.S)
    logger.debug("Ev: %s",n.Ev)
    logger.debug("ND: %s",n.ND)
    i+=1


EventsFile = OutPrefixName + ".events.txt"
with open(EventsFile,"w") as File:
        File.write("\n".join(EventSummary)+"\n")
예제 #5
0
    lambda node: node.name.split("_")[0])  # n.species, n.name
t.set_outgroup(t & outgroup)
#taxon1 = ["parensis", "longipalpusC", "vaneedeni"]
#taxon2 = ["funestus", "funestuscf", "vaneedeni"]
taxon = [["parensis", "longipalpusC", "vaneedeni"],
         ["funestus", "funestuscf", "vaneedeni"]]
taxdict = {}
for i, tax in enumerate(taxon):
    nodesupport = []
    nodeage = []
    for t in treelist:
        if t.check_monophyly(values=tax, target_attr="species"):
            samples = []
            for sp in tax:
                samples.extend(t.search_nodes(species=sp))
            ancnode = t.get_common_ancestor(samples)
            nodeage.append(ancnode.dist)
            nodesupport.append(ancnode.support)
    taxdict[i] = (nodeage, nodesupport)

    if not winlist:
        winarray = np.ones(len(treelist), dtype=bool)
    mtreelist, winarray = getMonophyletic(treelist, quart, winarray)
    btreelist, winarray = supportFilt(mtreelist, quart, winarray)
    if nodes:
        nh1, nh2 = nodeHeights(btreelist, quart)
    else:
        nh1 = []
        nh2 = []
    return (treelist, winarray, nh1, nh2)
예제 #6
0
def run(args):
    from ete3 import Tree, PhyloTree

    features = set()
    for nw in args.src_tree_iterator:
        if args.ncbi:
            tree = PhyloTree(nw)
            features.update([
                "taxid", "name", "rank", "bgcolor", "sci_name",
                "collapse_subspecies", "named_lineage", "lineage"
            ])
            tree.annotate_ncbi_taxa(args.taxid_attr)
        else:
            tree = Tree(nw)

        type2cast = {
            "str": str,
            "int": int,
            "float": float,
            "set": set,
            "list": list
        }

        for annotation in args.feature:
            aname, asource, amultiple, acast = None, None, False, str
            for field in annotation:
                try:
                    key, value = list(map(str.strip, field.split(":")))
                except Exception:
                    raise ValueError("Invalid feature option [%s]" % field)

                if key == "name":
                    aname = value
                elif key == "source":
                    asource = value
                elif key == "multiple":
                    #append
                    amultiple = value
                elif key == "type":
                    try:
                        acast = type2cast[value]
                    except KeyError:
                        raise ValueError("Invalid feature type [%s]" % field)
                else:
                    raise ValueError("Unknown feature option [%s]" % field)

            if not aname and not asource:
                ValueError(
                    'name and source are required when annotating a new feature [%s]'
                    % annotation)

            features.add(aname)
            for line in open(asource, 'rU'):
                line = line.strip()
                if not line or line.startswith('#'):
                    continue
                nodenames, attr_value = list(map(str.strip, line.split('\t')))
                nodenames = list(map(str.strip, nodenames.split(',')))
                relaxed_grouping = True
                if nodenames[0].startswith('!'):
                    relaxed_grouping = False
                    nodenames[0] = nodenames[0][1:]

                if len(nodenames) > 1:
                    target_node = tree.get_common_ancestor(nodenames)
                    if not relaxed_grouping:
                        pass
                        # do something
                else:
                    target_node = tree & nodenames[0]

                if hasattr(target_node, aname):
                    log.warning('Overwriting annotation for node" [%s]"' %
                                nodenames)
                else:
                    target_node.add_feature(aname, acast(attr_value))

        dump(tree, features=features)
예제 #7
0
# Of course, you can disable the automatic generation of species
# names. To do so, you can set the species naming function to
# None. This is useful to set the species names manually or for
# reading them from a newick file. Other wise, species attribute would
# be overwriten
mynewick = """
(((Hsa_001[&&NHX:species=Human],Ptr_001[&&NHX:species=Chimp]),
(Cfa_001[&&NHX:species=Dog],Mms_001[&&NHX:species=Mouse])),
(Dme_001[&&NHX:species=Fly],Dme_002[&&NHX:species=Fly]));
"""
t = PhyloTree(mynewick, sp_naming_function=None)
print "Disabled mode (manual set):"
for n in t.get_leaves():
    print "node:", n.name, "Species name:", n.species
# node: Dme_001 Species name: Fly
# node: Dme_002 Species name: Fly
# node: Hsa_001 Species name: Human
# node: Ptr_001 Species name: Chimp
# node: Cfa_001 Species name: Dog
# node: Mms_001 Species name: Mouse
#
# Of course, once this info is available you can query any internal
# node for species covered.
human_mouse_ancestor = t.get_common_ancestor("Hsa_001", "Mms_001")
print "These are the species under the common ancestor of Human & Mouse"
print '\n'.join( human_mouse_ancestor.get_species() )
# Mouse
# Chimp
# Dog
# Human
예제 #8
0
#From a Newick tree and two list of samples, extract the first coalescent event between samples of this two groups
from ete3 import PhyloTree
from optparse import OptionParser

parser = OptionParser()
parser.add_option("-f", "--file", dest="filename",
	help="File containing newick tree", metavar="FILE")
parser.add_option("-s", "--species1", dest="species1List",
	help="file containing list of samples, group1, one per line", metavar="FILE")
parser.add_option("-p", "--species2", dest="species2List",
	help="file containing list of samples, group2, one per line", metavar="FILE")
(options, args) = parser.parse_args()

t= PhyloTree(options.filename, format=1)

with open(options.species1List) as f:
	liste1 = f.read().splitlines()

with open(options.species2List) as f:
	liste2 = f.read().splitlines()

for s1 in liste1:
	for s2 in liste2:
		pp=t.get_common_ancestor(s1,s2)
		print pp.name
                    default=1000)
    args = ap.parse_args()

    og_list = []
    with open(args.outgroupf, "r") as ogf:
        for line in ogf:
            og_list.append(line.strip())

    tr = PhyloTree(args.tree,
                   sp_naming_function=lambda node: node.name.split("@")[0])

    og_in_tr = []
    for l in tr.iter_leaves():
        if l.name.split("@")[0] in og_list:
            og_in_tr.append(l.name)

    all_l = [b.name for b in tr.iter_leaves()]
    ing = list(set(all_l) - set(og_in_tr))

    tr.set_outgroup(tr.get_common_ancestor(*og_in_tr))
    tr.prune(ing, preserve_branch_length=True)
    all_l = list(set(all_l) - set(og_in_tr))

    trlen = calc_trlen(tr)

    sub_trlen = calc_sub_trlen(tr)

    resamp_dict = resample(sub_trlen, args.iterate)

    print([(k, v)
           for k, v in sorted(resamp_dict.items(), key=lambda x: x[1])][-9][0])
예제 #10
0
# Of course, you can disable the automatic generation of species
# names. To do so, you can set the species naming function to
# None. This is useful to set the species names manually or for
# reading them from a newick file. Other wise, species attribute would
# be overwriten
mynewick = """
(((Hsa_001[&&NHX:species=Human],Ptr_001[&&NHX:species=Chimp]),
(Cfa_001[&&NHX:species=Dog],Mms_001[&&NHX:species=Mouse])),
(Dme_001[&&NHX:species=Fly],Dme_002[&&NHX:species=Fly]));
"""
t = PhyloTree(mynewick, sp_naming_function=None)
print "Disabled mode (manual set):"
for n in t.get_leaves():
    print "node:", n.name, "Species name:", n.species
# node: Dme_001 Species name: Fly
# node: Dme_002 Species name: Fly
# node: Hsa_001 Species name: Human
# node: Ptr_001 Species name: Chimp
# node: Cfa_001 Species name: Dog
# node: Mms_001 Species name: Mouse
#
# Of course, once this info is available you can query any internal
# node for species covered.
human_mouse_ancestor = t.get_common_ancestor("Hsa_001", "Mms_001")
print "These are the species under the common ancestor of Human & Mouse"
print "\n".join(human_mouse_ancestor.get_species())
# Mouse
# Chimp
# Dog
# Human