def test(): tax = Taxonomy.newTaxonomy() tax.newTaxon('Mouse', 'species', 'about:blank') tax.newTaxon('Dog', 'species', 'about:blank') tax.newTaxon('Mammal', 'class', 'about:blank') new_claims = [ Has_child('Mammal', 'Mouse', 'about:blank'), Whether_same('Mouse', 'Mus', True), Whether_same('Muus', 'Mouse', True), ] expectations = [ Has_child('Mammal', 'Mouse'), Has_child('Mammal', With_ancestor('Mouse', 'Mammal')), Has_child(With_descendant('Mammal', 'Mouse'), 'Mouse'), Whether_same('Mammal', 'Mammal', True), Whether_same('Mammal', 'Meemmal', False), Whether_same('Mouse', 'Mus', True), Whether_same('Mus', 'Mouse', True), Whether_same('Mus', 'Horse', False), ] surprises = [ Has_child('Mouse', 'Mammal'), Has_child('Mammal', 'Dog'), # fails Whether_same('Mammal', 'Meemmal', True), Whether_same('Mammal', 'Mammal', False), ] make_claims(tax, new_claims) passed = test_claims(tax, expectations) passed = test_claims(tax, new_claims) find_surprises(tax, surprises) return passed
def load_fung(): fung = Taxonomy.getTaxonomy('tax/fung/', 'if') fung.analyzeMajorRankConflicts() # 2014-04-14 Bad Fungi homonyms in new version of IF. 90156 is the good one. # 90154 has no descendants if fung.maybeTaxon('90154') != None: print 'Removing Fungi 90154' fung.taxon('90154').prune(this_source) # 90155 is "Nom. inval." and has no descendants if fung.maybeTaxon('90155') != None: print 'Removing Fungi 90155' fung.taxon('90155').prune(this_source) fix_basal(fung) # smush folds sibling taxa that have the same name. # fung.smush() if True: patch_fung(fung) else: try: patch_fung(fung) except: print '**** Exception in patch_fung' fung.smush() return fung
def load_fung(): fung = Taxonomy.getTaxonomy('tax/fung/', 'if') fung.analyzeMajorRankConflicts() # 2014-04-14 Bad Fungi homonyms in new version of IF. 90156 is the good one. # 90154 has no descendants if fung.maybeTaxon('90154') != None: print 'Removing Fungi 90154' fung.taxon('90154').prune(this_source) # 90155 is "Nom. inval." and has no descendants if fung.maybeTaxon('90155') != None: print 'Removing Fungi 90155' fung.taxon('90155').prune(this_source) fix_basal(fung) # smush folds sibling taxa that have the same name. # fung.smush() if True: patch_fung(fung) else: try: patch_fung(fung) except: print '**** Exception in patch_fung' fung.smush() return fung
def test(): tax = Taxonomy.newTaxonomy() tax.newTaxon('Mouse', 'species', 'about:blank') tax.newTaxon('Dog', 'species', 'about:blank') tax.newTaxon('Mammal', 'class', 'about:blank') new_claims = [ Has_child('Mammal', 'Mouse', 'about:blank'), Whether_same('Mouse', 'Mus', True), Whether_same('Muus', 'Mouse', True), ] expectations = [ Has_child('Mammal', 'Mouse'), Has_child('Mammal', With_ancestor('Mouse', 'Mammal')), Has_child(With_descendant('Mammal', 'Mouse'), 'Mouse'), Whether_same('Mammal', 'Mammal', True), Whether_same('Mammal', 'Meemmal', False), Whether_same('Mouse', 'Mus', True), Whether_same('Mus', 'Mouse', True), Whether_same('Mus', 'Horse', False), ] surprises = [ Has_child('Mouse', 'Mammal'), Has_child('Mammal', 'Dog'), # fails Whether_same('Mammal', 'Meemmal', True), Whether_same('Mammal', 'Mammal', False), ] make_claims(tax, new_claims) passed = test_claims(tax, expectations) passed = test_claims(tax, new_claims) find_surprises(tax, surprises) return passed
def loadSilva(): silva = Taxonomy.getTaxonomy('tax/silva/', 'silva') # JAR 2014-05-13 scrutinizing pin() and BarrierNodes. Wikipedia # confirms this synonymy. Dail L. prefers -phyta to -phyceae # but says -phytina would be more correct per code. silva.taxon('Rhodophyceae').rename('Rhodophyta') return silva
def loadIrmng(): irmng = Taxonomy.getTaxonomy('tax/irmng/', 'irmng') irmng.smush() fixProtists(irmng) fixPlants(irmng) irmng.taxon('Animalia').synonym('Metazoa') return irmng
def load_silva(): silva = Taxonomy.getTaxonomy('tax/silva/', 'silva') # Used in studies pg_2448,pg_2783,pg_2753, seen deprecated on 2015-07-20 silva.taxon('AF364847').rename('Pantoea ananatis LMG 20103') # ncbi:706191 silva.taxon('EF690403').rename('Pantoea ananatis B1-9') # ncbi:1048262 patch_silva(silva) return silva
def load_silva(): silva = Taxonomy.getTaxonomy('tax/silva/', 'silva') # Used in studies pg_2448,pg_2783,pg_2753, seen deprecated on 2015-07-20 silva.taxon('AF364847').rename('Pantoea ananatis LMG 20103') # ncbi:706191 silva.taxon('EF690403').rename('Pantoea ananatis B1-9') # ncbi:1048262 patch_silva(silva) return silva
def loadGbif(): gbif = Taxonomy.getTaxonomy('tax/gbif/', 'gbif') gbif.smush() fixProtists(gbif) # creates a Eukaryota node fixPlants(gbif) gbif.taxon('Animalia').synonym('Metazoa') # JAR 2014-07-18 - get rid of Helophorus duplication gbif.taxon('3263442').absorb(gbif.taxon('6757656')) return gbif
def load_gbif(): gbif = Taxonomy.getTaxonomy('tax/gbif/', 'gbif') gbif.smush() # In GBIF, if a rank is skipped for some children but not others, that # means the rank-skipped children are incertae sedis. Mark them so. gbif.analyzeMajorRankConflicts() fix_basal(gbif) # creates a Eukaryota node gbif.taxon('Animalia').synonym('Metazoa') patch_gbif(gbif) return gbif
def load_gbif(): gbif = Taxonomy.getTaxonomy('tax/gbif/', 'gbif') gbif.smush() # In GBIF, if a rank is skipped for some children but not others, that # means the rank-skipped children are incertae sedis. Mark them so. gbif.analyzeMajorRankConflicts() fix_basal(gbif) # creates a Eukaryota node gbif.taxon('Animalia').synonym('Metazoa') patch_gbif(gbif) return gbif
def load_ncbi(): ncbi = Taxonomy.getTaxonomy('tax/ncbi/', 'ncbi') fix_SAR(ncbi) ncbi.taxon('Viridiplantae').rename('Chloroplastida') patch_ncbi(ncbi) # analyzeOTUs sets flags on questionable taxa ("unclassified", # hybrids, and so on) to allow the option of suppression downstream ncbi.analyzeOTUs() ncbi.analyzeContainers() return ncbi
def loadH2007(): h2007 = Taxonomy.getNewick('feed/h2007/tree.tre', 'h2007') # 2014-04-08 Misspelling if h2007.maybeTaxon('Chaetothryriomycetidae') != None: h2007.taxon('Chaetothryriomycetidae').rename('Chaetothyriomycetidae') if h2007.maybeTaxon('Asteriniales') != None: h2007.taxon('Asteriniales').rename('Asterinales') else: h2007.taxon('Asterinales').synonym('Asteriniales') return h2007
def load_ncbi(): ncbi = Taxonomy.getTaxonomy('tax/ncbi/', 'ncbi') fix_SAR(ncbi) ncbi.taxon('Viridiplantae').rename('Chloroplastida') patch_ncbi(ncbi) # analyzeOTUs sets flags on questionable taxa ("unclassified", # hybrids, and so on) to allow the option of suppression downstream ncbi.analyzeOTUs() ncbi.analyzeContainers() return ncbi
def load_h2007(): h2007 = Taxonomy.getNewick('feed/h2007/tree.tre', 'h2007') # 2014-04-08 Misspelling if h2007.maybeTaxon('Chaetothryriomycetidae') != None: h2007.taxon('Chaetothryriomycetidae').rename('Chaetothyriomycetidae') if h2007.maybeTaxon('Asteriniales') != None: h2007.taxon('Asteriniales').rename('Asterinales') else: h2007.taxon('Asterinales').synonym('Asteriniales') # h2007/if synonym https://github.com/OpenTreeOfLife/reference-taxonomy/issues/40 h2007.taxon('Urocystales').synonym('Urocystidales') return h2007
def load_h2007(): h2007 = Taxonomy.getNewick('feed/h2007/tree.tre', 'h2007') # 2014-04-08 Misspelling if h2007.maybeTaxon('Chaetothryriomycetidae') != None: h2007.taxon('Chaetothryriomycetidae').rename('Chaetothyriomycetidae') if h2007.maybeTaxon('Asteriniales') != None: h2007.taxon('Asteriniales').rename('Asterinales') else: h2007.taxon('Asterinales').synonym('Asteriniales') # h2007/if synonym https://github.com/OpenTreeOfLife/reference-taxonomy/issues/40 h2007.taxon('Urocystales').synonym('Urocystidales') return h2007
def loadFung(): fung = Taxonomy.getTaxonomy('tax/if/', 'if') # 2014-04-14 Bad Fungi homonyms in new version of IF. 90156 is the good one. # 90154 has no descendants if fung.maybeTaxon('90154') != None: print 'Removing Fungi 90154' fung.taxon('90154').prune() # 90155 is "Nom. inval." and has no descendants if fung.maybeTaxon('90155') != None: print 'Removing Fungi 90155' fung.taxon('90155').prune() fixProtists(fung) # smush folds sibling taxa that have the same name. fung.smush() return fung
def load_worms(): worms = Taxonomy.getTaxonomy('tax/worms/', 'worms') worms.smush() worms.taxon('Biota').rename('life') worms.taxon('Animalia').synonym('Metazoa') fix_basal(worms) # 2015-02-17 According to WoRMS web site. Occurs in pg_1229 if worms.maybeTaxon('Scenedesmus communis') != None: worms.taxon('Scenedesmus communis').synonym('Scenedesmus caudata') # See NCBI worms.taxon('Millericrinida').extant() # Help to match up with IRMNG worms.taxon('Ochrophyta').synonym('Heterokontophyta') worms.smush() # Gracilimesus gorbunovi, pg_1783 return worms
def load_worms(): worms = Taxonomy.getTaxonomy('tax/worms/', 'worms') worms.smush() worms.taxon('Biota').rename('life') worms.taxon('Animalia').synonym('Metazoa') fix_basal(worms) # 2015-02-17 According to WoRMS web site. Occurs in pg_1229 if worms.maybeTaxon('Scenedesmus communis') != None: worms.taxon('Scenedesmus communis').synonym('Scenedesmus caudata') # See NCBI worms.taxon('Millericrinida').extant() # Help to match up with IRMNG worms.taxon('Ochrophyta').synonym('Heterokontophyta') worms.smush() # Gracilimesus gorbunovi, pg_1783 return worms
def loadNcbi(): ncbi = Taxonomy.getTaxonomy('tax/ncbi/', 'ncbi') ncbi.taxon('Viridiplantae').rename('Chloroplastida') # New NCBI top level taxa introduced circa July 2014 for toplevel in ["Viroids", "other sequences", "unclassified sequences"]: if ncbi.maybeTaxon(toplevel) != None: ncbi.taxon(toplevel).prune() # - Canonicalize division names (cf. skeleton) - # JAR 2014-05-13 scrutinizing pin() and BarrierNodes. Wikipedia # confirms these synonymies. ncbi.taxon('Glaucocystophyceae').rename('Glaucophyta') ncbi.taxon('Haptophyceae').rename('Haptophyta') # analyzeOTUs sets flags on questionable taxa ("unclassified", # hybrids, and so on) to allow the option of suppression downstream ncbi.analyzeOTUs() ncbi.analyzeContainers() return ncbi
def load_irmng(): irmng = Taxonomy.getTaxonomy('tax/irmng/', 'irmng') irmng.smush() irmng.analyzeMajorRankConflicts() fix_basal(irmng) irmng.taxon('Animalia').synonym('Metazoa') # JAR 2014-04-26 Flush all 'Unaccepted' taxa irmng.taxon('Unaccepted', 'life').prune(this_source) # Fixes # Neopithecus (extinct) occurs in two places. Flush one, mark the other irmng.taxon('1413316').prune(this_source) #Neopithecus in Mammalia irmng.taxon('1413315').extinct() #Neopithecus in Primates (Pongidae) # RR #50 # irmng.taxon('Saxo-Fridericia').rename('Saxofridericia') # irmng.taxon('Saxofridericia').absorb(irmng.taxon('Saxo-fridericia')) saxo = irmng.maybeTaxon('1063899') if saxo != None: saxo.absorb(irmng.taxon('1071613')) # Romina 2014-04-09 # IRMNG has EIGHT different Trichodermas. (Four are synonyms of other things.) # 1307461 = Trichoderma Persoon 1794, in Hypocreaceae # https://github.com/OpenTreeOfLife/reference-taxonomy/issues/86 irmng.taxon('Hypocrea').absorb(irmng.taxon('1307461')) # JAR 2015-06-28 # The synonym Ochrothallus multipetalus -> Niemeyera multipetala # is no good; it interferes with correct processing of Ochrothallus # multipetalus. We could remove the synonym, but instead remove its # target because no synonym-removal command is available. irmng.taxon('Niemeyera multipetala').prune(this_source) tip = irmng.taxon('Tipuloidea', 'Hemiptera') # irmng:1170022 if tip != None: tip.prune("about:blank#this-homonym-is-causing-too-much-trouble") oph = irmng.taxon('Ophiurina', 'Ophiurinidae') # irmng:1346026 if oph != None: oph.prune("about:blank#this-homonym-is-causing-too-much-trouble") # NCBI synonymizes Pelecypoda = Bivalvia irmng.taxon('Bivalvia').absorb(irmng.taxon('Pelecypoda')) # bogus order # hmm irmng.taxon('Bivalvia').extant() # This one was mapping to Blattodea, and making it extinct. # Caused me a couple of hours of grief. # My guess is it's because its unique child Sinogramma is in Blattodea in GBIF. # Wikipedia says it's paraphyletic. irmng.taxon('Blattoptera', 'Insecta').prune('https://en.wikipedia.org/wiki/Blattoptera') # 2015-07-25 Found while trying to figure out why Theraphosidae was marked extinct. # NCBI thinks that Theraphosidae and Aviculariidae are the same. irmng.taxon('Aviculariidae').extant() # 2015-07-25 Extra Dipteras are confusing new division logic. Barren genus irmng.taxon('1323521').prune(this_source) # 2015-09-10 This one is unclassified (Diptera) and is leading to confusion with two other Steinias. irmng.taxon('1299622').prune(this_source) # 2015-09-11 https://github.com/OpenTreeOfLife/feedback/issues/74 # Lymnea is a snail, not a shark irmng.taxon('1317416').prune(this_source) # 2015-10-12 JAR checked IRMNG online and this taxon (Ctenophora in Chelicerata) did not exist if irmng.maybeTaxon('1279363') != None: irmng.taxon('1279363').prune(this_source) return irmng
def load_713(): study713 = Taxonomy.getTaxonomy('tax/713/', 'study713') return study713
# Jython script to add names as 3rd column to otu OTT ids file from org.opentreeoflife.smasher import Taxonomy import csv ottdirname = 'tax/prev_ott/' infilename = 'ids-that-are-otus.tsv' outfilename = 'ids-that-are-otus.tsv.new' # Load OTT ids = Taxonomy.getTaxonomy(ottdirname) # Load ids file and write ids file def doit(): win = 0 lose = 0 infile = open(infilename, 'r') outfile = open(outfilename, 'w') for line in infile: row = line.strip().split('\t') id = row[0] studies = row[1] taxon = ids.lookupId(id) name = "" if taxon != None: name = taxon.name win += 1 else:
small_tax = ott.maybeTaxon(small_id) if small_tax == None: small_tax = ott.maybeTaxon(small) if small_tax == None: print '** No unique taxon with id %s or name %s' % (small_id, small) else: print '** %s is %s, not %s' % (small, small_tax.id, small_id) else: look = ott.maybeTaxon(small, big) if look == None: print '** %s=%s not under %s' % (small, small_id, big) small_tax.show() elif look != small_tax: print '** The %s that descends from %s is %s, not %s' % ( small, big, look.id, small_id) if small_tax.isHidden(): print '%s (%s) is hidden' % (small, small_id) infile.close() if __name__ == '__main__': taxname = 'tax/ott/' if len(sys.argv) > 1: taxname = sys.argv[1] else: print sys.argv check(Taxonomy.getTaxonomy(taxname))
# Smasher script, for demonstration purposes. # This was written as a one-off and isn't meant to show off good # coding style. The treatment of ambiguous names is definitely a # kludge. It would be better to use the taxonThatContains method to # refer to a taxon without using an id. from org.opentreeoflife.smasher import Taxonomy ott = Taxonomy.getTaxonomy('tax/2.8/') ncbi = Taxonomy.getTaxonomy('tax/ncbi/') def do_counts(tax, bac, cil): for x in [ "Bacteria", "Cyanobacteria", "Ciliophora", "Nematoda", "Chlorophyta", "Rhodophyceae", "Fungi", "Insecta", "Chordata", "Embryophyta" ]: key = x if x == "Bacteria": key = bac if x == "Ciliophora": key = cil print x, tax.taxon(key).tipCount() print "OTT 2.8" do_counts(ott, "844192", "302424") print "NCBI 11 June 2014" do_counts(ncbi, "2", "5878") """
def load_irmng(): irmng = Taxonomy.getTaxonomy('tax/irmng/', 'irmng') irmng.smush() irmng.analyzeMajorRankConflicts() fix_basal(irmng) irmng.taxon('Animalia').synonym('Metazoa') # JAR 2014-04-26 Flush all 'Unaccepted' taxa irmng.taxon('Unaccepted', 'life').prune(this_source) # Fixes # Neopithecus (extinct) occurs in two places. Flush one, mark the other irmng.taxon('1413316').prune(this_source) #Neopithecus in Mammalia irmng.taxon('1413315').extinct() #Neopithecus in Primates (Pongidae) # RR #50 # irmng.taxon('Saxo-Fridericia').rename('Saxofridericia') # irmng.taxon('Saxofridericia').absorb(irmng.taxon('Saxo-fridericia')) saxo = irmng.maybeTaxon('1063899') if saxo != None: saxo.absorb(irmng.taxon('1071613')) # Romina 2014-04-09 # IRMNG has EIGHT different Trichodermas. (Four are synonyms of other things.) # 1307461 = Trichoderma Persoon 1794, in Hypocreaceae # https://github.com/OpenTreeOfLife/reference-taxonomy/issues/86 irmng.taxon('Hypocrea').absorb(irmng.taxon('1307461')) # JAR 2015-06-28 # The synonym Ochrothallus multipetalus -> Niemeyera multipetala # is no good; it interferes with correct processing of Ochrothallus # multipetalus. We could remove the synonym, but instead remove its # target because no synonym-removal command is available. irmng.taxon('Niemeyera multipetala').prune(this_source) tip = irmng.taxon('Tipuloidea', 'Hemiptera') # irmng:1170022 if tip != None: tip.prune("about:blank#this-homonym-is-causing-too-much-trouble") oph = irmng.taxon('Ophiurina', 'Ophiurinidae') # irmng:1346026 if oph != None: oph.prune("about:blank#this-homonym-is-causing-too-much-trouble") # NCBI synonymizes Pelecypoda = Bivalvia irmng.taxon('Bivalvia').absorb(irmng.taxon('Pelecypoda')) # bogus order # hmm irmng.taxon('Bivalvia').extant() # This one was mapping to Blattodea, and making it extinct. # Caused me a couple of hours of grief. # My guess is it's because its unique child Sinogramma is in Blattodea in GBIF. # Wikipedia says it's paraphyletic. irmng.taxon('Blattoptera', 'Insecta').prune('https://en.wikipedia.org/wiki/Blattoptera') # 2015-07-25 Found while trying to figure out why Theraphosidae was marked extinct. # NCBI thinks that Theraphosidae and Aviculariidae are the same. irmng.taxon('Aviculariidae').extant() # 2015-07-25 Extra Dipteras are confusing new division logic. Barren genus irmng.taxon('1323521').prune(this_source) # 2015-09-10 This one is unclassified (Diptera) and is leading to confusion with two other Steinias. irmng.taxon('1299622').prune(this_source) # 2015-09-11 https://github.com/OpenTreeOfLife/feedback/issues/74 # Lymnea is a snail, not a shark irmng.taxon('1317416').prune(this_source) # 2015-10-12 JAR checked IRMNG online and this taxon (Ctenophora in Chelicerata) did not exist if irmng.maybeTaxon('1279363') != None: irmng.taxon('1279363').prune(this_source) return irmng
# Jython script to build the Open Tree reference taxonomy from org.opentreeoflife.smasher import Taxonomy import sys sys.path.append("feed/ott/") from chromista_spreadsheet import fixChromista ott = Taxonomy.newTaxonomy() h2007 = Taxonomy.getNewick('feed/h2007/tree.tre', 'h2007') ott.absorb(h2007) silva = Taxonomy.getTaxonomy('tax/silva/', 'silva') ott.absorb(silva) study713 = Taxonomy.getTaxonomy('tax/713/', 'study713') ott.notSame(study713.taxon('Buchnera'), silva.taxon('Buchnera')) ott.absorb(study713) fung = Taxonomy.getTaxonomy('tax/if/', 'if') fung.smush() fung.analyzeMajorRankConflicts() ott.absorb(fung) ncbi = Taxonomy.getTaxonomy('tax/ncbi/', 'ncbi') ncbi.taxon('Fungi').hideDescendants() ott.same(ncbi.taxon('Cyanobacteria'), silva.taxon('D88288/#3')) ott.notSame(ncbi.taxon('Burkea'), fung.taxon('Burkea')) ott.notSame(ncbi.taxon('Coscinium'), fung.taxon('Coscinium')) ott.notSame(ncbi.taxon('Perezia'), fung.taxon('Perezia')) # This one should be temporary, might change with SILVA 117.
# Jython script to build the "model village" taxonomy. from org.opentreeoflife.smasher import Taxonomy # Create model taxonomy tax = Taxonomy.newTaxonomy() # Establish homonym-resolution skeleton (not really used here) skel = Taxonomy.getTaxonomy('tax/skel/', 'skel') tax.setSkeleton(skel) # Add NCBI subset to the model taxonomy ncbi = Taxonomy.getTaxonomy('t/tax/ncbi_aster/') # analyzeOTUs sets flags on questionable taxa ("unclassified" and so on) # to allow the option of suppression downstream ncbi.analyzeOTUs() tax.absorb(ncbi) # Add GBIF subset fo the model taxonomy gbif = Taxonomy.getTaxonomy('t/tax/gbif_aster/') # analyzeMajorRankConflicts sets the "major_rank_conflict" flag when # intermediate ranks are missing (e.g. a family that's a child of a # class) gbif.analyzeMajorRankConflicts() tax.absorb(gbif) # "Old" patch system with tab-delimited files tax.edit('t/edits/') # Example of referring to a taxon
# Jython script to build the model village taxonomy. from org.opentreeoflife.smasher import Taxonomy ncbi = Taxonomy.getTaxonomy('t/tax/ncbi_aster/') ncbi.analyzeOTUs() gbif = Taxonomy.getTaxonomy('t/tax/gbif_aster/') gbif.analyzeMajorRankConflicts() tax = Taxonomy.unite([ncbi, gbif]) tax.edit('t/edits/') fam = tax.taxon("Phellinaceae") gen = tax.newTaxon("Opentreeia", "genus", "data:testing") fam.add(gen) gen.add(tax.newTaxon("Opentreeia sp. A", "species", "data:testing")) gen.add(tax.newTaxon("Opentreeia sp. B", "species", "data:testing")) sp = tax.newTaxon("Opentreeia sp. C", "species", "data:testing") gen.add(sp) sp.prune() tax.assignIds(Taxonomy.getTaxonomy('t/tax/prev_aster/')) tax.dump("t/tax/aster/")
from org.opentreeoflife.smasher import Taxonomy sourcenames = ['if.4', 'if.2', 'if.1'] sources = [ Taxonomy.getTaxonomy('feed/fung/' + name + '/', name) for name in sourcenames ] fung = Taxonomy.getTaxonomy('feed/fung/if.7/', 'if') def fix_root(): root = establish('Fungi', '90156', 'kingdom') establish('Ascomycota', '90031', 'phylum') return root def establish(name, id, rank): taxon = fung.maybeTaxon(name) if taxon == None: taxon = fung.newTaxon(name, rank, None) taxon.setId(id) else: if taxon.id != id: print '** unexpected id %s for %s' % (taxon.id, name) return taxon def fix_parents(root): changes = {}
def load_713(): study713 = Taxonomy.getTaxonomy('tax/713/', 'study713') return study713
def create_ott(): ott = Taxonomy.newTaxonomy() # There ought to be tests for all of these... for name in names_of_interest: ott.namesOfInterest.add(name) # When lumping, prefer to use ids that have been used in OTU matching # This list could be used for all sorts of purposes... ott.loadPreferredIds('ids-that-are-otus.tsv', False) ott.loadPreferredIds('ids-in-synthesis.tsv', True) ott.setSkeleton(Taxonomy.getTaxonomy('tax/skel/', 'skel')) silva = prepare_silva(ott) ott.absorb(silva) check_invariants(ott) h2007 = prepare_h2007(ott) ott.absorb(h2007) (fungi, fungorum_sans_fungi) = prepare_fungorum(ott) ott.absorb(fungi) check_invariants(ott) # the non-Fungi from Index Fungorum get absorbed below lamiales = prepare_lamiales(ott) ott.absorb(lamiales) (malacostraca, worms_sans_malacostraca) = prepare_worms(ott) ott.absorb(malacostraca) ncbi = prepare_ncbi(ott) align_ncbi_to_silva(ncbi, silva, ott) ott.absorb(ncbi) check_invariants(ott) ott.absorb(worms_sans_malacostraca) ott.absorb(fungorum_sans_fungi) gbif = prepare_gbif(ott) ott.absorb(gbif) irmng = prepare_irmng(ott) ott.absorb(irmng) taxonomies.link_to_h2007(ott) get_default_extinct_info_from_gbif(gbif, ott) check_invariants(ott) # consider try: ... except: print '**** Exception in patch_ott' patch_ott(ott) # Experimental... unextinct_ncbi(ncbi, ott) # Remove all trees but the largest (or make them life incertae sedis) ott.deforestate() # ----------------------------------------------------------------------------- # OTT id assignment # Force some id assignments... will try to automate this in the future. # Most of these come from looking at the otu-deprecated.tsv file after a # series of smasher runs. for (inf, sup, id) in [ ('Tipuloidea', 'Diptera', '722875'), ('Saccharomycetes', 'Saccharomycotina', '989999'), ('Phaeosphaeria', 'Ascomycota', '5486272'), ('Synedra acus', 'Eukaryota', '992764'), ('Epiphloea', 'Halymeniaceae', '5342325'), ('Hessea', 'Archaeplastida', '600099'), ('Morganella', 'Arthropoda', '6400'), ('Rhynchonelloidea', 'Rhynchonellidae', '5316010'), ('Epiphloea', 'Lichinales', '5342482'), ('Morganella', 'Fungi', '973932'), ('Parmeliaceae', 'Lecanorales', '305904'), ]: tax = ott.taxon(inf, sup) if tax != None: tax.setId(id) ott.taxonThatContains('Rhynchonelloidea', 'Sphenarina').setId('795939') # NCBI for (ncbi_id, ott_id, name) in ncbi_assignments_list: n = ncbi.maybeTaxon(ncbi_id) if n != None: im = ott.image(n) if im != None: im.setId(ott_id) else: print '** NCBI %s not mapped - %s' % (ncbi_id, name) else: print '** No NCBI taxon %s - %s' % (ncbi_id, name) # Cylindrocarpon is now Neonectria ott.image(gbif.taxon('2563163')).setId('51754') # Foo trich = fungi.maybeTaxon('Trichosporon') if trich != None: ott.image(trich).setId('364222') #ott.image(fungi.taxon('11060')).setId('4107132') #Cryptococcus - a total mess # Assign OTT ids to taxa that don't have them, re-using old ids when possible ids = Taxonomy.getTaxonomy('tax/prev_ott/') # Assign old ids to nodes in the new version ott.assignIds(ids) report_on_h2007(h2007, ott) return ott
# One-off script prepared to provide data to David Hibbett and Romina Gazis. # Lists numbers of species in each fungal order. from org.opentreeoflife.smasher import Taxonomy import csv, sys from taxonomies import load_fung, load_ncbi, load_gbif, load_irmng taxonomies = [ ("fung", load_fung(), "Index Fungorum"), ("ncbi", load_ncbi(), "NCBI"), ("gbif", load_gbif(), "GBIF"), ("irmng", load_irmng(), "IRMNG"), ("ott", Taxonomy.getTaxonomy("tax/ott/"), "OTT 2.9"), ] def main(): infile = open("order-counts-orders.csv", "r") reader = csv.reader(infile) reader.next() # header row taxa = ["Fungi"] for tuple in reader: taxa.append(tuple[0]) infile.close() write_counts(taxa) def write_counts(taxa): outfile = open("order-counts.csv", "w") writer = csv.writer(outfile)
# Smasher script, for demonstration purposes. # This was written as a one-off and isn't meant to show off good # coding style. The treatment of ambiguous names is definitely a # kludge. It would be better to use the taxonThatContains method to # refer to a taxon without using an id. from org.opentreeoflife.smasher import Taxonomy ott = Taxonomy.getTaxonomy('tax/2.8/') ncbi = Taxonomy.getTaxonomy('tax/ncbi/') def do_counts(tax, bac, cil): for x in ["Bacteria", "Cyanobacteria", "Ciliophora", "Nematoda", "Chlorophyta", "Rhodophyceae", "Fungi", "Insecta", "Chordata", "Embryophyta"]: key = x if x == "Bacteria": key = bac if x == "Ciliophora": key = cil print x, tax.taxon(key).tipCount() print "OTT 2.8" do_counts(ott, "844192", "302424")
# One-off script prepared to provide data to David Hibbett and Romina Gazis. # Lists numbers of species in each fungal order. from org.opentreeoflife.smasher import Taxonomy import csv, sys from taxonomies import load_fung, load_ncbi, load_gbif, load_irmng taxonomies = [ ('fung', load_fung(), 'Index Fungorum'), ('ncbi', load_ncbi(), 'NCBI'), ('gbif', load_gbif(), 'GBIF'), ('irmng', load_irmng(), 'IRMNG'), ('ott', Taxonomy.getTaxonomy('tax/ott/'), 'OTT 2.9'), ] def main(): infile = open('order-counts-orders.csv', 'r') reader = csv.reader(infile) reader.next() #header row taxa = ['Fungi'] for tuple in reader: taxa.append(tuple[0]) infile.close() write_counts(taxa) def write_counts(taxa): outfile = open('order-counts.csv', 'w') writer = csv.writer(outfile)
# Jython script to build the Open Tree reference taxonomy # coding=utf-8 # Unless specified otherwise issues are in the reference-taxonomy repo: # https://github.com/OpenTreeOfLife/reference-taxonomy/issues/... import sys from org.opentreeoflife.smasher import Taxonomy import taxonomies sys.path.append("feed/misc/") from chromista_spreadsheet import fixChromista ott = Taxonomy.newTaxonomy() skel = Taxonomy.getTaxonomy('tax/skel/', 'skel') ott.setSkeleton(skel) # ----- SILVA microbial taxonomy ----- def doSilva(): silva = taxonomies.loadSilva() # - Deal with parent/child homonyms in SILVA - # Arbitrary choices here to eliminate ambiguities down the road when NCBI gets merged. # (If the homonym is retained, then the merge algorithm will have no # way to choose between them, and refuse to match either. It will # then create a third homonym.) # Note order dependence between the following two silva.taxon('Intramacronucleata','Intramacronucleata').rename('Intramacronucleata inf.') silva.taxon('Spirotrichea','Intramacronucleata inf.').rename('Spirotrichea inf.') silva.taxon('Cyanobacteria','Bacteria').rename('Cyanobacteria sup.')
from org.opentreeoflife.smasher import Taxonomy from org.opentreeoflife.smasher import Reportx import taxonomies ott = Taxonomy.newTaxonomy() skel = Taxonomy.getTaxonomy('tax/skel/', 'skel') ott.setSkeleton(skel) def report(tax, tag): ott.markDivisions(tax) # Reportx.bogotypes(tax) taxonomies.checkDivisions(tax) Reportx.report(tax, tag + '-mrca-report.tsv') if True: report(taxonomies.loadIrmng(), 'irmng') else: silva = taxonomies.loadSilva() ott.notSame(silva.taxon('Ctenophora', 'Coscinodiscophytina'), skel.taxon('Ctenophora')) report(silva, 'silva') report(taxonomies.loadH2007(), 'h2007') report(taxonomies.loadFung(), 'if') report(taxonomies.loadNcbi(), 'ncbi') report(taxonomies.loadGbif(), 'gbif') report(taxonomies.loadIrmng(), 'irmng') report(taxonomies.loadOtt(), 'ott')
# Jython script to build the Open Tree reference taxonomy # coding=utf-8 # Unless specified otherwise issues are in the reference-taxonomy repo: # https://github.com/OpenTreeOfLife/reference-taxonomy/issues/... import sys from org.opentreeoflife.smasher import Taxonomy import taxonomies sys.path.append("feed/misc/") from chromista_spreadsheet import fixChromista ott = Taxonomy.newTaxonomy() skel = Taxonomy.getTaxonomy('tax/skel/', 'skel') ott.setSkeleton(skel) # ----- SILVA microbial taxonomy ----- def doSilva(): silva = taxonomies.loadSilva() # - Deal with parent/child homonyms in SILVA - # Arbitrary choices here to eliminate ambiguities down the road when NCBI gets merged. # (If the homonym is retained, then the merge algorithm will have no # way to choose between them, and refuse to match either. It will # then create a third homonym.) # Note order dependence between the following two silva.taxon('Intramacronucleata','Intramacronucleata').rename('Intramacronucleata inf.') silva.taxon('Spirotrichea','Intramacronucleata inf.').rename('Spirotrichea inf.') silva.taxon('Cyanobacteria','Bacteria').rename('Cyanobacteria sup.')
# Version 4 of the Index Fungorum conversion is missing many parent # pointers. This script recovers them from versions 1, 2, and 3 # and writes out a revised IF taxonomy. # Run this with smash --jython feed/if/patch-if.py where # alias smash='java -classpath ".:lib/*" -Xmx10g org.opentreeoflife.smasher.Smasher' from org.opentreeoflife.smasher import Taxonomy fung = Taxonomy.getTaxonomy('tax/if.4/', 'if') fungi = fung.newTaxon('Fungi', 'kingdom', 'if:90156') fungi.id = '90156' #kludge ascomycota = fung.newTaxon('Ascomycota', 'phylum', 'if:90031') ascomycota.id = '90031' changes = {} losers = {} def fixit(ofung): for taxon in fung: danger = False if not (taxon in changes) and taxon.getParent() == None: otaxon = ofung.maybeTaxon(taxon.id) if otaxon == None: otaxon = ofung.maybeTaxon(taxon.name) if otaxon != None and abs(int(otaxon.id) - int(taxon.id)) > 2: danger = True if otaxon != None and otaxon.parent != None:
# Jython script to build the Open Tree reference taxonomy from org.opentreeoflife.smasher import Taxonomy import sys sys.path.append("feed/misc/") from chromista_spreadsheet import fixChromista ott = Taxonomy.newTaxonomy() # ----- SILVA microbial taxonomy ----- silva = Taxonomy.getTaxonomy('tax/silva/', 'silva') # Deal with parent/child homonyms in SILVA. # Arbitrary choices here to eliminate ambiguities down the road when NCBI gets merged. # (If the homonym is retained, then the merge algorithm will have no # way to choose between them, and refuse to match either. It will # then create a third homonym.) # Note order dependence between the following two silva.taxon('Intramacronucleata', 'Intramacronucleata').rename('Intramacronucleata inf.') silva.taxon('Spirotrichea', 'Intramacronucleata inf.').rename('Spirotrichea inf.') silva.taxon('Cyanobacteria', 'Bacteria').rename('Cyanobacteria sup.') silva.taxon('Actinobacteria', 'Bacteria').rename('Actinobacteria sup.') silva.taxon('Acidobacteria', 'Bacteria').rename('Acidobacteria sup.') silva.taxon('Ochromonas', 'Ochromonadales').rename('Ochromonas sup.') silva.taxon('Tetrasphaera', 'Tetrasphaera').rename('Tetrasphaera inf.') # SILVA's placement of Rozella as a sibling of Fungi is contradicted # by Hibbett 2007, which puts it under Fungi. Hibbett gets priority.
# Jython script to add names as 3rd column to otu OTT ids file from org.opentreeoflife.smasher import Taxonomy import csv ottdirname = 'tax/prev_ott/' infilename = 'ids-that-are-otus.tsv' outfilename = 'ids-that-are-otus.tsv.new' # Load OTT ids = Taxonomy.getTaxonomy(ottdirname) # Load ids file and write ids file def doit(): win = 0 lose = 0 infile = open(infilename, 'r') outfile = open(outfilename, 'w') for line in infile: row = line.strip().split('\t') id = row[0] studies = row[1] taxon = ids.lookupId(id) name = "" if taxon != None: name = taxon.name win += 1 else: lose += 1
from org.opentreeoflife.smasher import Taxonomy from org.opentreeoflife.smasher import Reportx import taxonomies ott = Taxonomy.newTaxonomy() skel = Taxonomy.getTaxonomy('tax/skel/', 'skel') ott.setSkeleton(skel) def report(tax, tag): ott.markDivisions(tax) # Reportx.bogotypes(tax) taxonomies.checkDivisions(tax) Reportx.report(tax, tag + '-mrca-report.tsv') if True: report(taxonomies.loadIrmng(), 'irmng') else: silva = taxonomies.loadSilva() ott.notSame(silva.taxon('Ctenophora', 'Coscinodiscophytina'), skel.taxon('Ctenophora')) report(silva, 'silva') report(taxonomies.loadH2007(), 'h2007') report(taxonomies.loadFung(), 'if') report(taxonomies.loadNcbi(), 'ncbi') report(taxonomies.loadGbif(), 'gbif') report(taxonomies.loadIrmng(), 'irmng') report(taxonomies.loadOtt(), 'ott')
def create_ott(): ott = Taxonomy.newTaxonomy() # There ought to be tests for all of these... for name in names_of_interest: ott.namesOfInterest.add(name) # When lumping, prefer to use ids that have been used in OTU matching # This list could be used for all sorts of purposes... ott.loadPreferredIds('ids-that-are-otus.tsv', False) ott.loadPreferredIds('ids-in-synthesis.tsv', True) ott.setSkeleton(Taxonomy.getTaxonomy('tax/skel/', 'skel')) silva = prepare_silva(ott) ott.absorb(silva) check_invariants(ott) h2007 = prepare_h2007(ott) ott.absorb(h2007) (fungi, fungorum_sans_fungi) = prepare_fungorum(ott) ott.absorb(fungi) check_invariants(ott) # the non-Fungi from Index Fungorum get absorbed below lamiales = prepare_lamiales(ott) ott.absorb(lamiales) (malacostraca, worms_sans_malacostraca) = prepare_worms(ott) ott.absorb(malacostraca) ncbi = prepare_ncbi(ott) align_ncbi_to_silva(ncbi, silva, ott) ott.absorb(ncbi) check_invariants(ott) ott.absorb(worms_sans_malacostraca) ott.absorb(fungorum_sans_fungi) gbif = prepare_gbif(ott) ott.absorb(gbif) irmng = prepare_irmng(ott) ott.absorb(irmng) taxonomies.link_to_h2007(ott) get_default_extinct_info_from_gbif(gbif, ott) check_invariants(ott) # consider try: ... except: print '**** Exception in patch_ott' patch_ott(ott) # Experimental... unextinct_ncbi(ncbi, ott) # Remove all trees but the largest (or make them life incertae sedis) ott.deforestate() # ----------------------------------------------------------------------------- # OTT id assignment # Force some id assignments... will try to automate this in the future. # Most of these come from looking at the otu-deprecated.tsv file after a # series of smasher runs. for (inf, sup, id) in [ ('Tipuloidea', 'Diptera', '722875'), ('Saccharomycetes', 'Saccharomycotina', '989999'), ('Phaeosphaeria', 'Ascomycota', '5486272'), ('Synedra acus','Eukaryota','992764'), ('Epiphloea','Halymeniaceae','5342325'), ('Hessea','Archaeplastida','600099'), ('Morganella','Arthropoda','6400'), ('Rhynchonelloidea','Rhynchonellidae','5316010'), ('Epiphloea', 'Lichinales', '5342482'), ('Morganella', 'Fungi', '973932'), ('Parmeliaceae', 'Lecanorales', '305904'), ]: tax = ott.taxon(inf, sup) if tax != None: tax.setId(id) ott.taxonThatContains('Rhynchonelloidea', 'Sphenarina').setId('795939') # NCBI for (ncbi_id, ott_id, name) in ncbi_assignments_list: n = ncbi.maybeTaxon(ncbi_id) if n != None: im = ott.image(n) if im != None: im.setId(ott_id) else: print '** NCBI %s not mapped - %s' % (ncbi_id, name) else: print '** No NCBI taxon %s - %s' % (ncbi_id, name) # Cylindrocarpon is now Neonectria ott.image(gbif.taxon('2563163')).setId('51754') # Foo trich = fungi.maybeTaxon('Trichosporon') if trich != None: ott.image(trich).setId('364222') #ott.image(fungi.taxon('11060')).setId('4107132') #Cryptococcus - a total mess # Assign OTT ids to taxa that don't have them, re-using old ids when possible ids = Taxonomy.getTaxonomy('tax/prev_ott/') # Assign old ids to nodes in the new version ott.assignIds(ids) report_on_h2007(h2007, ott) return ott
from org.opentreeoflife.smasher import Taxonomy sourcenames = ['if.4', 'if.2', 'if.1'] sources = [Taxonomy.getTaxonomy('feed/fung/' + name + '/', name) for name in sourcenames] fung = Taxonomy.getTaxonomy('feed/fung/if.7/', 'if') def fix_root(): root = establish('Fungi', '90156', 'kingdom') establish('Ascomycota', '90031', 'phylum') return root def establish(name, id, rank): taxon = fung.maybeTaxon(name) if taxon == None: taxon = fung.newTaxon(name, rank, None) taxon.setId(id) else: if taxon.id != id: print '** unexpected id %s for %s' % (taxon.id, name) return taxon def fix_parents(root): changes = {} for taxon in fung: if taxon.getParent() == None: # See if the taxon in a previous version recovered = None for source in sources:
# Version 4 of the Index Fungorum conversion is missing many parent # pointers. This script recovers them from versions 1, 2, and 3 # and writes out a revised IF taxonomy. # Run this with smash --jython feed/if/patch-if.py where # alias smash='java -classpath ".:lib/*" -Xmx10g org.opentreeoflife.smasher.Smasher' from org.opentreeoflife.smasher import Taxonomy fung = Taxonomy.getTaxonomy('tax/if.4/', 'if') fungi = fung.newTaxon('Fungi', 'kingdom', 'if:90156') fungi.id = '90156' #kludge ascomycota = fung.newTaxon('Ascomycota', 'phylum', 'if:90031') ascomycota.id = '90031' changes = {} losers = {} def fixit(ofung): for taxon in fung: danger = False if not (taxon in changes) and taxon.getParent() == None: otaxon = ofung.maybeTaxon(taxon.id) if otaxon == None: otaxon = ofung.maybeTaxon(taxon.name) if otaxon != None and abs(int(otaxon.id) - int(taxon.id)) > 2: danger = True if otaxon != None and otaxon.parent != None:
# Jython script to build the "model village" taxonomy. from org.opentreeoflife.smasher import Taxonomy # Create model taxonomy tax = Taxonomy.newTaxonomy() # Establish homonym-resolution skeleton (not really used here) skel = Taxonomy.getTaxonomy('tax/skel/', 'skel') tax.setSkeleton(skel) # Add NCBI subset to the model taxonomy ncbi = Taxonomy.getTaxonomy('t/tax/ncbi_aster/') # analyzeOTUs sets flags on questionable taxa ("unclassified" and so on) # to allow the option of suppression downstream ncbi.analyzeOTUs() tax.absorb(ncbi) # Add GBIF subset fo the model taxonomy gbif = Taxonomy.getTaxonomy('t/tax/gbif_aster/') # analyzeMajorRankConflicts sets the "major_rank_conflict" flag when # intermediate ranks are missing (e.g. a family that's a child of a # class) gbif.analyzeMajorRankConflicts() tax.absorb(gbif) # "Old" patch system with tab-delimited files tax.edit('t/edits/') # Example of referring to a taxon fam = tax.taxon("Phellinaceae")
def loadOtt(): ott = Taxonomy.getTaxonomy('tax/ott/', 'ott') return ott