## From a newick tree and a list of sample, find the TMRCA of these samples from ete3 import PhyloTree from optparse import OptionParser parser = OptionParser() parser.add_option("-f", "--file", dest="filename", help="File containing newick tree", metavar="FILE") parser.add_option("-s", "--species", dest="speciesList", help="file containing list of wanted species, one per line", metavar="FILE") (options, args) = parser.parse_args() t= PhyloTree(options.filename, format=1) with open(options.speciesList) as f: liste = f.read().splitlines() pp=t.get_common_ancestor(liste) print pp.name
# \--------| # \-Mmu_002 # Create a dictionary with relative ages for the species present in # the phylogenetic tree. Note that ages are only relative numbers to # define which species are older, and that different species can # belong to the same age. species2age = { "Hsa": 1, # H**o sapiens (Hominids) "Ptr": 2, # P. troglodytes (primates) "Mmu": 2, # Macaca mulata (primates) "Mms": 3, # Mus musculus (mammals) "Cfa": 3, # Canis familiaris (mammals) "Dme": 4, # Drosophila melanogaster (metazoa) } # We can translate each number to its correspondig taxonomic number age2name = {1: "hominids", 2: "primates", 3: "mammals", 4: "metazoa"} event1 = t.get_common_ancestor("Hsa_001", "Hsa_004") event2 = t.get_common_ancestor("Hsa_001", "Hsa_002") print print "The duplication event leading to the human sequences Hsa_001 and " + "Hsa_004 is dated at: ", age2name[ event1.get_age(species2age) ] print "The duplication event leading to the human sequences Hsa_001 and " + "Hsa_002 is dated at: ", age2name[ event2.get_age(species2age) ] # The duplication event leading to the human sequences Hsa_001 and Hsa_004 # is dated at: primates # # The duplication event leading to the human sequences Hsa_001 and Hsa_002 # is dated at: mammals
help=("print the extended newick format for provided tree using" " ASCII representation and all its evolutionary events" " before orthoXML export")) args = parser.parse_args() newick = args.tree[0] SPECIES_NAME_POS = args.species_field SPECIES_NAME_DELIMITER = args.species_delimiter # load a phylomeDB Tree provided as a newick file in the command line t = PhyloTree(newick, sp_naming_function=extract_spname) if args.root: if len(args.root) > 1: outgroup = t.get_common_ancestor(args.root) else: outgroup = t & args.root[0] t.set_outgroup(outgroup) if not args.skip_ortholog_detection: # detect speciation and duplication events using the species overlap # algorithm used in phylomeDB t.get_descendant_evol_events() if args.ascii: print( t.get_ascii(attributes=[args.evoltype_attr, "name"], show_internal=True)) if args.newick:
File.write("\n".join(HomologySummary)+"\n") EventSummary = [] i=0 for n in recon_tree.traverse("postorder"): n.ND = i if n.is_leaf(): n.S = sp_dict[n.species] if "evoltype" in dir(n): n.Ev = n.evoltype if n.evoltype == "L": EventSummary.append("event(%i,loss)" %(n.S)) elif n.evoltype == "D": sp_dup = n.get_species() oldest_sp = sptree.get_common_ancestor(sp_dup) n.S = oldest_sp.S logger.debug("sp_dup: %s ",sp_dup) EventSummary.append("event(%i,duplication)" %(n.S)) else: n.Ev = "S" logger.debug("name: %s",n.name) logger.debug("S: %s",n.S) logger.debug("Ev: %s",n.Ev) logger.debug("ND: %s",n.ND) i+=1 EventsFile = OutPrefixName + ".events.txt" with open(EventsFile,"w") as File: File.write("\n".join(EventSummary)+"\n")
lambda node: node.name.split("_")[0]) # n.species, n.name t.set_outgroup(t & outgroup) #taxon1 = ["parensis", "longipalpusC", "vaneedeni"] #taxon2 = ["funestus", "funestuscf", "vaneedeni"] taxon = [["parensis", "longipalpusC", "vaneedeni"], ["funestus", "funestuscf", "vaneedeni"]] taxdict = {} for i, tax in enumerate(taxon): nodesupport = [] nodeage = [] for t in treelist: if t.check_monophyly(values=tax, target_attr="species"): samples = [] for sp in tax: samples.extend(t.search_nodes(species=sp)) ancnode = t.get_common_ancestor(samples) nodeage.append(ancnode.dist) nodesupport.append(ancnode.support) taxdict[i] = (nodeage, nodesupport) if not winlist: winarray = np.ones(len(treelist), dtype=bool) mtreelist, winarray = getMonophyletic(treelist, quart, winarray) btreelist, winarray = supportFilt(mtreelist, quart, winarray) if nodes: nh1, nh2 = nodeHeights(btreelist, quart) else: nh1 = [] nh2 = [] return (treelist, winarray, nh1, nh2)
def run(args): from ete3 import Tree, PhyloTree features = set() for nw in args.src_tree_iterator: if args.ncbi: tree = PhyloTree(nw) features.update([ "taxid", "name", "rank", "bgcolor", "sci_name", "collapse_subspecies", "named_lineage", "lineage" ]) tree.annotate_ncbi_taxa(args.taxid_attr) else: tree = Tree(nw) type2cast = { "str": str, "int": int, "float": float, "set": set, "list": list } for annotation in args.feature: aname, asource, amultiple, acast = None, None, False, str for field in annotation: try: key, value = list(map(str.strip, field.split(":"))) except Exception: raise ValueError("Invalid feature option [%s]" % field) if key == "name": aname = value elif key == "source": asource = value elif key == "multiple": #append amultiple = value elif key == "type": try: acast = type2cast[value] except KeyError: raise ValueError("Invalid feature type [%s]" % field) else: raise ValueError("Unknown feature option [%s]" % field) if not aname and not asource: ValueError( 'name and source are required when annotating a new feature [%s]' % annotation) features.add(aname) for line in open(asource, 'rU'): line = line.strip() if not line or line.startswith('#'): continue nodenames, attr_value = list(map(str.strip, line.split('\t'))) nodenames = list(map(str.strip, nodenames.split(','))) relaxed_grouping = True if nodenames[0].startswith('!'): relaxed_grouping = False nodenames[0] = nodenames[0][1:] if len(nodenames) > 1: target_node = tree.get_common_ancestor(nodenames) if not relaxed_grouping: pass # do something else: target_node = tree & nodenames[0] if hasattr(target_node, aname): log.warning('Overwriting annotation for node" [%s]"' % nodenames) else: target_node.add_feature(aname, acast(attr_value)) dump(tree, features=features)
# Of course, you can disable the automatic generation of species # names. To do so, you can set the species naming function to # None. This is useful to set the species names manually or for # reading them from a newick file. Other wise, species attribute would # be overwriten mynewick = """ (((Hsa_001[&&NHX:species=Human],Ptr_001[&&NHX:species=Chimp]), (Cfa_001[&&NHX:species=Dog],Mms_001[&&NHX:species=Mouse])), (Dme_001[&&NHX:species=Fly],Dme_002[&&NHX:species=Fly])); """ t = PhyloTree(mynewick, sp_naming_function=None) print "Disabled mode (manual set):" for n in t.get_leaves(): print "node:", n.name, "Species name:", n.species # node: Dme_001 Species name: Fly # node: Dme_002 Species name: Fly # node: Hsa_001 Species name: Human # node: Ptr_001 Species name: Chimp # node: Cfa_001 Species name: Dog # node: Mms_001 Species name: Mouse # # Of course, once this info is available you can query any internal # node for species covered. human_mouse_ancestor = t.get_common_ancestor("Hsa_001", "Mms_001") print "These are the species under the common ancestor of Human & Mouse" print '\n'.join( human_mouse_ancestor.get_species() ) # Mouse # Chimp # Dog # Human
#From a Newick tree and two list of samples, extract the first coalescent event between samples of this two groups from ete3 import PhyloTree from optparse import OptionParser parser = OptionParser() parser.add_option("-f", "--file", dest="filename", help="File containing newick tree", metavar="FILE") parser.add_option("-s", "--species1", dest="species1List", help="file containing list of samples, group1, one per line", metavar="FILE") parser.add_option("-p", "--species2", dest="species2List", help="file containing list of samples, group2, one per line", metavar="FILE") (options, args) = parser.parse_args() t= PhyloTree(options.filename, format=1) with open(options.species1List) as f: liste1 = f.read().splitlines() with open(options.species2List) as f: liste2 = f.read().splitlines() for s1 in liste1: for s2 in liste2: pp=t.get_common_ancestor(s1,s2) print pp.name
default=1000) args = ap.parse_args() og_list = [] with open(args.outgroupf, "r") as ogf: for line in ogf: og_list.append(line.strip()) tr = PhyloTree(args.tree, sp_naming_function=lambda node: node.name.split("@")[0]) og_in_tr = [] for l in tr.iter_leaves(): if l.name.split("@")[0] in og_list: og_in_tr.append(l.name) all_l = [b.name for b in tr.iter_leaves()] ing = list(set(all_l) - set(og_in_tr)) tr.set_outgroup(tr.get_common_ancestor(*og_in_tr)) tr.prune(ing, preserve_branch_length=True) all_l = list(set(all_l) - set(og_in_tr)) trlen = calc_trlen(tr) sub_trlen = calc_sub_trlen(tr) resamp_dict = resample(sub_trlen, args.iterate) print([(k, v) for k, v in sorted(resamp_dict.items(), key=lambda x: x[1])][-9][0])
# Of course, you can disable the automatic generation of species # names. To do so, you can set the species naming function to # None. This is useful to set the species names manually or for # reading them from a newick file. Other wise, species attribute would # be overwriten mynewick = """ (((Hsa_001[&&NHX:species=Human],Ptr_001[&&NHX:species=Chimp]), (Cfa_001[&&NHX:species=Dog],Mms_001[&&NHX:species=Mouse])), (Dme_001[&&NHX:species=Fly],Dme_002[&&NHX:species=Fly])); """ t = PhyloTree(mynewick, sp_naming_function=None) print "Disabled mode (manual set):" for n in t.get_leaves(): print "node:", n.name, "Species name:", n.species # node: Dme_001 Species name: Fly # node: Dme_002 Species name: Fly # node: Hsa_001 Species name: Human # node: Ptr_001 Species name: Chimp # node: Cfa_001 Species name: Dog # node: Mms_001 Species name: Mouse # # Of course, once this info is available you can query any internal # node for species covered. human_mouse_ancestor = t.get_common_ancestor("Hsa_001", "Mms_001") print "These are the species under the common ancestor of Human & Mouse" print "\n".join(human_mouse_ancestor.get_species()) # Mouse # Chimp # Dog # Human