from ete3 import PhylomeDBConnector # This connects to the main phylomeDB server (default parameters) p = PhylomeDBConnector() # Obtains the phylomeDB internal ID for my gene of interest idmatches = p.search_id("ENSG00000146556") # Take the only match (several would be possible) geneid = idmatches[0] # Gets the 'geneid' tree in phylome 1 reconstructed using WAG evolutionary model t, likelihood = p.get_tree(geneid, "WAG", 1) print t # # /-Xtr0044988 # | # | /-Gga0000980 # | | # /---| | /-Bta0018700 # | | | | # | | | | /-Hsa0000001 # | | | | /---| # | | | /---| /---| \-Hsa0010733 # | \---| | | | | # | | | | /---| \-Hsa0010710 # | | | | | | # | | /---| \---| \-Ptr0000001 # /---| | | | | # | | | | | \-Cfa0016699 # | | | | | # | | \---| | /-Rno0030248 # | | | \---| # | | | \-Mms0024821 # /---| | |
from ete3 import PhylomeDBConnector # This connects to the main phylomeDB server (default parameters) p = PhylomeDBConnector() PHYLOME_ID = 1 # This is the species code/age dictionary used to correctly root the # tree in the human phylome. You can define your own, or use the # midpoint outgroup method species2age = {'Aga': 8, 'Ago': 9, 'Ame': 8, 'Ath': 10, 'Bta': 3, 'Cal': 9, 'Cbr': 8,\ 'Cel': 8, 'Cfa': 3, 'Cgl': 9, 'Cin': 7, 'Cne': 9, 'Cre': 10, 'Ddi': 10, \ 'Dha': 9, 'Dme': 8, 'Dre': 6, 'Ecu': 9, 'Fru': 6, 'Gga': 4, 'Gth': 10,\ 'Gze': 9, 'Hsa': 1, 'Kla': 9, 'Lma': 10, 'Mdo': 3, 'Mms': 3, 'Mmu': 2,\ 'Ncr': 9, 'Pfa': 10, 'Pte': 10, 'Ptr': 2, 'Pyo': 10, 'Rno': 3, 'Sce': 9,\ 'Spb': 9, 'Tni': 6, 'Xtr': 5, 'Yli': 9 } # Iterator over each sequence in the human proteme for i, seqid in enumerate(p.get_seed_ids(PHYLOME_ID)): if i>2: break # Just process the first 2 ids winner_model, lks, t = p.get_best_tree(seqid, PHYLOME_ID) # If tree was sucsesfully reconstructed, runs the species overalp algorithm if t and seqid in t: outgroup = t.get_farthest_oldest_leaf(species2age) # Returned outgroup is used to root the tree t.set_outgroup(outgroup) # Finds the node representing the seed sequence. # We want the orthology relationships of such sequence. seed_node = t.search_nodes(name=seqid)[0] evol_events = seed_node.get_my_evol_events() for ev in evol_events: # Speciation event if ev.etype == "S": inparalogs = filter(lambda n: n.startswith("Hsa"), ev.in_seqs) print 'ORTHOLOGY RELATIONSHIP:', ','.join(inparalogs), "<===>", ','.join(ev.out_seqs)
from ete3 import PhyloTree, PhylomeDBConnector, SeqGroup p = PhylomeDBConnector() w,x, t = p.get_best_tree("Hsa0000001", 1) a, l = p.get_clean_alg("Hsa0000001", 1) A = SeqGroup(a, "iphylip") for s in A.id2seq: A.id2seq[s]=A.id2seq[s][:30] t.link_to_alignment(A) print t.get_species() print t t.set_outgroup(t&"Ddi0002240") sp = PhyloTree("(((((((((((Hsa, Ptr), Mmu), ((Mms, Rno), (Bta, Cfa))), Mdo), Gga), Xtr), (Dre, Fru))),Cin) (Dme, Aga)), Ddi);") reconciled, evs = t.reconcile(sp) print reconciled reconciled.show()
from ete3 import PhylomeDBConnector # This connects to the main phylomeDB server (default parameters) p = PhylomeDBConnector() # This connects to a local version of phylomeDB, and you can set the # user and password arguments p = PhylomeDBConnector(host="localhost", user="******", passwd="public", port=3306)
from ete3 import PhyloTree, PhylomeDBConnector, SeqGroup p = PhylomeDBConnector() w, x, t = p.get_best_tree("Hsa0000001", 1) a, l = p.get_clean_alg("Hsa0000001", 1) A = SeqGroup(a, "iphylip") for s in A.id2seq: A.id2seq[s] = A.id2seq[s][:30] t.link_to_alignment(A) print t.get_species() print t t.set_outgroup(t & "Ddi0002240") sp = PhyloTree( "(((((((((((Hsa, Ptr), Mmu), ((Mms, Rno), (Bta, Cfa))), Mdo), Gga), Xtr), (Dre, Fru))),Cin) (Dme, Aga)), Ddi);" ) reconciled, evs = t.reconcile(sp) print reconciled reconciled.show()
from ete3 import PhylomeDBConnector # This connects to the main phylomeDB server (default parameters) p = PhylomeDBConnector() # Obtains a list of available phylomes phylomes_list = p.get_phylomes() # Obtains the list of proteomes used in phylome 1 (the human phylome) phylomes_list = p.get_proteomes_in_phylome(1) # all seeds (potentially, trees) in the human phylome all_seed_sequences = p.get_seed_ids(1) # Gets species info from associated to the "Hsa" code print p.get_species_info("Hsa") # {'code': 'Hsa', 'taxid': 9606L, 'name': 'Homo_sapiens'} # # You can also use the same method to find the species code given a ncbi taxid print p.get_species_info(9606) # {'code': 'Hsa', 'taxid': 9606L, 'name': 'Homo_sapiens'} # # Get phylomeDB IDs matching a given Ensembl protein ID. Always # returns the code of the longest isoform. idmatches = p.search_id("ENSG00000146556") # You can also use the search_id method to find the longest isoform of # the gene associated to a given phylomeID. Note that phylomeDB trees # are always reconstructed using the longest isoform associated to a # gene. # print p.search_id("Hsa0000125") # ['Hsa0000122']