def doit(): rug = Taxonomy.getTaxonomy('scratch/Ruggiero/', 'rug') ott = Taxonomy.getTaxonomy('tax/ott/', 'ott') union = UnionTaxonomy.newTaxonomy('ott') union.absorb(rug) union.absorb(ott) union.dump('scratch/compare_Ruggiero/', '\t')
def conflict(spec1, space1, spec2, space2): # Reference tree ref = Taxonomy.getTaxonomy(spec1, space1) # Input tree input = Taxonomy.getTaxonomy(spec2, space2) a = AlignmentByName(input, ref) a.align(); if False: for node in input.taxa(): print node, a.getTaxon(node) print 'Conflict analysis' ca = ConflictAnalysis(input, ref, a, False) print ' input root:', ca.inputRoot print ' ref root:', ca.refRoot print ' induced root:', ca.inducedRoot print ' ingroup:', ca.ingroup print ' induced ingroup:', ca.inducedIngroup print ' map size:', ca.map.size() print ' comap size:', ca.comap.size() mapped_tip_count = 0 unmapped_tip_count = 0 none_count = 0 rel_counts = {} if ca.inducedRoot != None: for node in ca.ingroup.descendants(True): if node.hasChildren(): art = ca.articulation(node) if art != None: n = art.disposition.name print node, n, art.witness rel_counts[n] = rel_counts.get(n, 0) + 1 else: print node, 'no articulation' none_count += 1 elif a.getTaxon(node) != None: mapped_tip_count += 1 else: unmapped_tip_count += 1 print node, 'unmapped' else: print 'no induced root!' print for n in rel_counts: print '%s: %s' % (n, rel_counts[n]) print 'Mapped tips:', mapped_tip_count print 'Unmapped tips:', unmapped_tip_count print 'Other:', none_count
def load_tree(path): tree = Taxonomy.getTaxonomy(path, 'ott') count = 0 for id in tree.allIds(): count += 1 print count, 'ids' return tree
def load_fung(): fung = Taxonomy.getTaxonomy('tax/fung/', 'if') fung.analyzeMajorRankConflicts() # 2014-04-14 Bad Fungi homonyms in new version of IF. 90156 is the good one. # 90154 has no descendants if fung.maybeTaxon('90154') != None: print 'Removing Fungi 90154' fung.taxon('90154').prune(this_source) # 90155 is "Nom. inval." and has no descendants if fung.maybeTaxon('90155') != None: print 'Removing Fungi 90155' fung.taxon('90155').prune(this_source) fix_basal(fung) # smush folds sibling taxa that have the same name. # fung.smush() if True: patch_fung(fung) else: try: patch_fung(fung) except: print '**** Exception in patch_fung' fung.smush() return fung
def compare(t1, t2): print 'comparing', t1, 'to', t2 retired = 0 became_hidden = 0 became_unhidden = 0 became_extinct = 0 became_unextinct = 0 became_suppressed = 0 became_unsuppressed = 0 kept = 0 novel = 0 tax1 = Taxonomy.getTaxonomy(t1, 'x') tax1.inferFlags() tax2 = Taxonomy.getTaxonomy(t2, 'x') tax2.inferFlags() for taxon in tax1.taxa(): probe = tax2.lookupId(taxon.id) if probe == None: retired += 1 elif probe.isAnnotatedHidden() and not taxon.isAnnotatedHidden(): became_hidden += 1 elif not probe.isAnnotatedHidden() and taxon.isAnnotatedHidden(): became_unhidden += 1 elif probe.isExtinct() and not taxon.isExtinct(): became_extinct += 1 elif not probe.isExtinct() and taxon.isExtinct(): became_unextinct += 1 elif probe.isHidden() and not taxon.isHidden(): became_suppressed += 1 elif not probe.isHidden() and taxon.isHidden(): became_unsuppressed += 1 else: kept += 1 for taxon in tax2.taxa(): if tax1.lookupId(taxon.id) == None: novel += 1 print print 'id retired:', retired print 'newly hidden:', became_hidden print 'no longer hidden:', became_unhidden print 'newly extinct:', became_extinct print 'no longer extinct:', became_unextinct print 'newly otherwise suppressed:', became_suppressed print 'no longer otherwise suppressed:', became_unsuppressed print 'new:', novel print 'no change in status:', kept
def compare(t1, t2): print 'comparing', t1, 'to', t2 retired = 0 became_hidden = 0 became_unhidden = 0 became_extinct = 0 became_unextinct = 0 became_suppressed = 0 became_unsuppressed = 0 kept = 0 novel = 0 tax1 = Taxonomy.getTaxonomy(t1, 'x') tax1.inferFlags() tax2 = Taxonomy.getTaxonomy(t2, 'x') tax2.inferFlags() for taxon in tax1.taxa(): probe = tax2.lookupId(taxon.id) if probe == None: retired += 1 elif probe.isAnnotatedHidden() and not taxon.isAnnotatedHidden(): became_hidden += 1 elif not probe.isAnnotatedHidden() and taxon.isAnnotatedHidden(): became_unhidden += 1 elif probe.isExtinct() and not taxon.isExtinct(): became_extinct += 1 elif not probe.isExtinct() and taxon.isExtinct(): became_unextinct += 1 elif probe.isHidden() and not taxon.isHidden(): became_suppressed += 1 elif not probe.isHidden() and taxon.isHidden(): became_unsuppressed += 1 else: kept += 1 for taxon in tax2.taxa(): if tax1.lookupId(taxon.id) == None: novel += 1 print print 'id retired:', retired print 'newly hidden:', became_hidden print 'no longer hidden:', became_unhidden print 'newly extinct:', became_extinct print 'no longer extinct:', became_unextinct print 'newly otherwise suppressed:', became_suppressed print 'no longer otherwise suppressed:', became_unsuppressed print 'new:', novel print 'no change in status:', kept
def load_silva(): silva = Taxonomy.getTaxonomy('tax/silva/', 'silva') # Used in studies pg_2448,pg_2783,pg_2753, seen deprecated on 2015-07-20 silva.taxon('AF364847').rename('Pantoea ananatis LMG 20103') # ncbi:706191 silva.taxon('EF690403').rename('Pantoea ananatis B1-9') # ncbi:1048262 patch_silva(silva) return silva
def load_ncbi(): ncbi = Taxonomy.getTaxonomy('tax/ncbi/', 'ncbi') fix_SAR(ncbi) ncbi.taxon('Viridiplantae').rename('Chloroplastida') patch_ncbi(ncbi) # analyzeOTUs sets flags on questionable taxa ("unclassified", # hybrids, and so on) to allow the option of suppression downstream ncbi.analyzeOTUs() ncbi.analyzeContainers() return ncbi
def load_gbif(): gbif = Taxonomy.getTaxonomy('tax/gbif/', 'gbif') gbif.smush() # In GBIF, if a rank is skipped for some children but not others, that # means the rank-skipped children are incertae sedis. Mark them so. gbif.analyzeMajorRankConflicts() fix_basal(gbif) # creates a Eukaryota node gbif.taxon('Animalia').synonym('Metazoa') patch_gbif(gbif) return gbif
def load_worms(): worms = Taxonomy.getTaxonomy('tax/worms/', 'worms') worms.smush() worms.taxon('Biota').rename('life') worms.taxon('Animalia').synonym('Metazoa') fix_basal(worms) # 2015-02-17 According to WoRMS web site. Occurs in pg_1229 if worms.maybeTaxon('Scenedesmus communis') != None: worms.taxon('Scenedesmus communis').synonym('Scenedesmus caudata') # See NCBI worms.taxon('Millericrinida').extant() # Help to match up with IRMNG worms.taxon('Ochrophyta').synonym('Heterokontophyta') worms.smush() # Gracilimesus gorbunovi, pg_1783 return worms
# counts number of taxa with rank=family in a given taxon from org.opentreeoflife.taxa import Taxonomy, Rank import argparse parser = argparse.ArgumentParser(description='load nexsons into postgres') parser.add_argument('taxonname', help='name of taxon to count' ) args = parser.parse_args() name = args.taxonname ott_path = '/Users/karen/Documents/opentreeoflife/data/ott/ott2.9draft12/' ott = Taxonomy.getTaxonomy(ott_path, 'ott') def count_families(taxon): count = 0 with open('families.txt','w') as f: for t in taxon.descendants(False): if t.rank == Rank.FAMILY_RANK: f.write("{n}\n".format(n=t.name)) count += 1 f.close() return count print "number families: ",count_families(ott.taxon(name))
def merge_sources(ott): # Genbank - this is a kludge to make sure it's in the dependencies list. # But eventually it ought to be handled in this file, not in the silva # import script. access_head('genbank') # SILVA silva = load_taxonomy('silva') adjustments.adjust_silva(silva) silva_to_ott = adjustments.align_silva(silva, ott) align_and_merge(silva_to_ott) # Hibbett 2007 h2007 = Taxonomy.getTaxonomy('curation/h2007/tree.tre', 'h2007') adjustments.adjust_h2007(h2007) h2007_to_ott = ott.alignment(h2007) align_and_merge(h2007_to_ott) # Index Fungorum fungorum = load_taxonomy('fung') adjustments.adjust_fung(fungorum) (fungi, fungorum_sans_fungi) = split_taxonomy(fungorum, 'Fungi') align_and_merge(adjustments.align_fungi(fungi, ott)) # Connect IF families to Hibbett 2007 orders adjustments.link_to_h2007(ott) # Look for orders that have no children in OTT report_on_h2007(h2007, h2007_to_ott, '#') # the non-Fungi from Index Fungorum get absorbed below lamiales = Taxonomy.getTaxonomy('curation/lamiales/', 'study713') adjustments.adjust_lamiales(lamiales) align_and_merge(adjustments.align_lamiales(lamiales, ott)) # WoRMS # higher priority to Worms for Malacostraca, Cnidaria, Mollusca # so we split out # those clades from worms and absorb them before NCBI worms = load_taxonomy('worms') adjustments.adjust_worms(worms) # Malacostraca instead of Decapoda because M. is in the separation taxonomy (malacostraca, worms_sans_malacostraca) = split_taxonomy(worms, 'Malacostraca') align_and_merge(ott.alignment(malacostraca)) (cnidaria, worms_sans_cnidaria) = split_taxonomy(worms_sans_malacostraca, 'Cnidaria') align_and_merge(ott.alignment(cnidaria)) (mollusca, low_priority_worms) = split_taxonomy(worms_sans_cnidaria, 'Mollusca') align_and_merge(ott.alignment(mollusca)) # NCBI ncbi = load_taxonomy('ncbi') adjustments.adjust_ncbi(ncbi) # analyzeOTUs sets flags on questionable taxa (hybrid, metagenomes, # etc) to allow the option of suppression downstream ncbi.analyzeOTUs() ncbi_to_ott = adjustments.align_ncbi(ncbi, silva, ott) align_and_merge(ncbi_to_ott) # Look for orders that have no children in OTT report_on_h2007(h2007, h2007_to_ott, '#') # Reporting # Get mapping from NCBI to OTT, derived via SILVA and Genbank. mappings = load_ncbi_to_silva( os.path.join(management.resource_path('silva'), 'ncbi_to_silva.tsv'), ncbi, silva, silva_to_ott) compare_ncbi_to_silva(mappings, silva_to_ott) # Low-priority WoRMS # This is suboptimal, but the names are confusing the division logic a = adjustments.align_worms(low_priority_worms, ott) align_and_merge(a) # The rest of Index Fungorum. (Maybe not a good idea to use this. # These taxa are all in GBIF.) # align_and_merge(adjustments.align_fungorum_sans_fungi(fungorum_sans_fungi, ott)) # GBIF gbif = load_taxonomy('gbif') adjustments.adjust_gbif(gbif) gbif_to_ott = adjustments.align_gbif(gbif, ott) align_and_merge(gbif_to_ott) # http://dx.doi.org/10.1016/j.ympev.2004.12.019 "Eccrinales # (Trichomycetes) are not fungi, but a clade of protists at the # early divergence of animals and fungi" debug_divisions('Enterobryus cingaloboli', gbif, ott) # Cylindrocarpon is now Neonectria cyl = gbif_to_ott.image(gbif.taxon('Cylindrocarpon', 'Ascomycota')) if cyl != None: cyl.setId('51754') # IRMNG irmng = load_taxonomy('irmng') adjustments.adjust_irmng(irmng) a = adjustments.align_irmng(irmng, ott) hide_irmng(irmng) align_and_merge(a) # Misc fixups report_on_h2007(h2007, h2007_to_ott, '**') get_default_extinct_info_from_gbif( os.path.join(management.resource_path('gbif'), 'paleo.tsv'), gbif, gbif_to_ott)
def load_taxonomy(spec): return Taxonomy.getTaxonomy(access_head(spec), management.get_property(spec, "ott_idspace"))
def load_713(): study713 = Taxonomy.getTaxonomy('tax/713/', 'study713') return study713
# Command line argument = file to write to # Writes a row for every OTT id that # (a) occurs in tax/ott/, # (b) occurs as an OTU in phylesystem, # (c) is sourced only from in IRMNG. import csv, sys from org.opentreeoflife.taxa import Taxonomy, Rank from org.opentreeoflife.smasher import UnionTaxonomy union = UnionTaxonomy.newTaxonomy('ott') union.loadPreferredIds('ids_that_are_otus.tsv', False) union.loadPreferredIds('ids_in_synthesis.tsv', True) ott = Taxonomy.getTaxonomy('tax/ott/', 'ott') #ott = Taxonomy.getTaxonomy('t/tax/aster/', 'ott') with open(sys.argv[1], 'w') as outfile: writer = csv.writer(outfile) writer.writerow(['irmng','ott','name','synthesis']) for taxon in ott.taxa(): # if (taxon.rank == Rank.SPECIES_RANK and ...) if (len(taxon.sourceIds) == 1 and taxon.sourceIds[0].prefix == 'irmng'): probe = union.importantIds.lookupId(taxon.id) if probe != None: writer.writerow([taxon.sourceIds[0].id, taxon.id, taxon.name, 'synthesis' if probe.inSynthesis else ''])
# Jython script to build the "model village" taxonomy. from org.opentreeoflife.taxa import Taxonomy from org.opentreeoflife.smasher import UnionTaxonomy from claim import Has_child # Create model taxonomy tax = UnionTaxonomy() # Establish homonym-resolution skeleton (not really used here) # skel = Taxonomy.getTaxonomy('tax/skel/', 'skel') # tax.setSkeleton(skel) # Add NCBI subset to the model taxonomy ncbi = Taxonomy.getTaxonomy('t/tax/ncbi_aster/') # analyzeOTUs sets flags on questionable taxa ("unclassified" and so on) # to allow the option of suppression downstream ncbi.analyzeOTUs() tax.absorb(ncbi) # Add GBIF subset fo the model taxonomy gbif = Taxonomy.getTaxonomy('t/tax/gbif_aster/') # analyzeMajorRankConflicts sets the "major_rank_conflict" flag when # intermediate ranks are missing (e.g. a family that's a child of a # class) gbif.analyzeMajorRankConflicts() tax.absorb(gbif) # "Old" patch system with tab-delimited files tax.edit('t/edits/')
def load_irmng(): irmng = Taxonomy.getTaxonomy('tax/irmng/', 'irmng') irmng.smush() irmng.analyzeMajorRankConflicts() fix_basal(irmng) irmng.taxon('Animalia').synonym('Metazoa') # JAR 2014-04-26 Flush all 'Unaccepted' taxa irmng.taxon('Unaccepted', 'life').prune(this_source) # Fixes # Neopithecus (extinct) occurs in two places. Flush one, mark the other irmng.taxon('1413316').prune(this_source) #Neopithecus in Mammalia irmng.taxon('1413315').extinct() #Neopithecus in Primates (Pongidae) # RR #50 # irmng.taxon('Saxo-Fridericia').rename('Saxofridericia') # irmng.taxon('Saxofridericia').absorb(irmng.taxon('Saxo-fridericia')) saxo = irmng.maybeTaxon('1063899') if saxo != None: saxo.absorb(irmng.taxon('1071613')) # Romina 2014-04-09 # IRMNG has EIGHT different Trichodermas. (Four are synonyms of other things.) # 1307461 = Trichoderma Persoon 1794, in Hypocreaceae # https://github.com/OpenTreeOfLife/reference-taxonomy/issues/86 irmng.taxon('Hypocrea').absorb(irmng.taxon('1307461')) # JAR 2015-06-28 # The synonym Ochrothallus multipetalus -> Niemeyera multipetala # is no good; it interferes with correct processing of Ochrothallus # multipetalus. We could remove the synonym, but instead remove its # target because no synonym-removal command is available. irmng.taxon('Niemeyera multipetala').prune(this_source) tip = irmng.taxon('Tipuloidea', 'Hemiptera') # irmng:1170022 if tip != None: tip.prune("about:blank#this-homonym-is-causing-too-much-trouble") oph = irmng.taxon('Ophiurina', 'Ophiurinidae') # irmng:1346026 if oph != None: oph.prune("about:blank#this-homonym-is-causing-too-much-trouble") # NCBI synonymizes Pelecypoda = Bivalvia irmng.taxon('Bivalvia').absorb(irmng.taxon('Pelecypoda')) # bogus order # hmm irmng.taxon('Bivalvia').extant() # This one was mapping to Blattodea, and making it extinct. # Caused me a couple of hours of grief. # My guess is it's because its unique child Sinogramma is in Blattodea in GBIF. # Wikipedia says it's paraphyletic. irmng.taxon('Blattoptera', 'Insecta').prune('https://en.wikipedia.org/wiki/Blattoptera') # 2015-07-25 Found while trying to figure out why Theraphosidae was marked extinct. # NCBI thinks that Theraphosidae and Aviculariidae are the same. irmng.taxon('Aviculariidae').extant() # 2015-07-25 Extra Dipteras are confusing new division logic. Barren genus irmng.taxon('1323521').prune(this_source) # 2015-09-10 This one is unclassified (Diptera) and is leading to confusion with two other Steinias. irmng.taxon('1299622').prune(this_source) # 2015-09-11 https://github.com/OpenTreeOfLife/feedback/issues/74 # Lymnea is a snail, not a shark irmng.taxon('1317416').prune(this_source) # 2015-10-12 JAR checked IRMNG online and this taxon (Ctenophora in Chelicerata) did not exist if irmng.maybeTaxon('1279363') != None: irmng.taxon('1279363').prune(this_source) return irmng
# Command line argument = file to write to # Writes a row for every OTT id that # (a) occurs in tax/ott/, # (b) occurs as an OTU in phylesystem, # (c) is sourced only from in IRMNG. import csv, sys from org.opentreeoflife.taxa import Taxonomy, Rank from org.opentreeoflife.smasher import UnionTaxonomy union = UnionTaxonomy.newTaxonomy('ott') union.loadPreferredIds('ids_that_are_otus.tsv', False) union.loadPreferredIds('ids_in_synthesis.tsv', True) ott = Taxonomy.getTaxonomy('tax/ott/', 'ott') #ott = Taxonomy.getTaxonomy('t/tax/aster/', 'ott') with open(sys.argv[1], 'w') as outfile: writer = csv.writer(outfile) writer.writerow(['irmng', 'ott', 'name', 'synthesis']) for taxon in ott.taxa(): # if (taxon.rank == Rank.SPECIES_RANK and ...) if (len(taxon.sourceIds) == 1 and taxon.sourceIds[0].prefix == 'irmng'): probe = union.importantIds.lookupId(taxon.id) if probe != None: writer.writerow([ taxon.sourceIds[0].id, taxon.id, taxon.name, 'synthesis' if probe.inSynthesis else '' ])
def assemble(): # Create model taxonomy tax = UnionTaxonomy.newTaxonomy('ott') for name in [ 'Pentaphragma ellipticum', 'Lachnophyllum', 'Sipolisia', 'Cicerbita bourgaei', 'Adenophora triphylla', 'Artemisia vulgaris', 'Carlina libanotica', ]: tax.watch(name) # Establish homonym-resolution skeleton (not really used here) # skel = Taxonomy.getTaxonomy('tax/skel/', 'skel') # tax.setSkeleton(skel) # Add NCBI subset to the model taxonomy ncbi = Taxonomy.getTaxonomy('t/tax/ncbi_aster/', 'ncbi') # analyzeOTUs sets flags on questionable taxa ("unclassified" and so on) # to allow the option of suppression downstream ncbi.analyzeOTUs() align_and_merge(tax.alignment(ncbi)) # Add GBIF subset fo the model taxonomy gbif = Taxonomy.getTaxonomy('t/tax/gbif_aster/', 'gbif') gbif.smush() # analyzeMajorRankConflicts sets the "major_rank_conflict" flag when # intermediate ranks are missing (e.g. a family that's a child of a # class) gbif.analyzeMajorRankConflicts() align_and_merge(tax.alignment(gbif)) # "Old" patch system with tab-delimited files TsvEdits.edit(tax, 't/edits/') props = [has_parent(taxon('Phellinaceae'), taxon('Asterales'), 'test:1')] for prop in props: print proclaim(tax, prop) gen = tax.newTaxon("Opentreeia", "genus", "data:testing") gen.take(tax.newTaxon("Opentreeia sp. C", "species", "data:testing")) gen.take(tax.newTaxon("Opentreeia sp. D", "species", "data:testing")) # Example of referring to a taxon fam = tax.maybeTaxon("Phellinaceae") if fam != None: # Example of how you might add a genus to the taxonomy fam.take(gen) # Test deletion feature sp = tax.newTaxon("Opentreeia sp. C", "species", "data:testing") gen.take(sp) sp.prune("aster.py") # tax.loadPreferredIds('ids-that-are-otus.tsv') additions_repo_path = 't/feed/amendments/amendments-0' new_taxa_path = 't/new_taxa' # Assign identifiers to the taxa in the model taxonomy. Identifiers # assigned in the previous version are carried over to this version. ids = Taxonomy.getTaxonomy('t/tax/prev_aster/', 'ott') tax.carryOverIds(ids) # performs alignment Addition.processAdditions(additions_repo_path, tax) if False: # too slow for everyday testing purposes. print '-- Checking id list' assign_ids_from_list(tax, 'ott_id_list/by_qid.csv') tax.assignNewIds(new_taxa_path) tax.check() # Write the model taxonomy out to a set of files tax.dump('t/tax/aster/', '\t|\t')
small, big, small_tax.id, small_id) show_interloper(small_node, small_id, ott) else: print '** More than one taxon named %s is in %s' % (small, big) print ' ', small_nodes infile.close() def show_interloper(small_node, small_id, ott): if small_node != small_node.taxon(): print ' %s is a synonym for %s' % (small_node.name, small_node.taxon().name) probe = ott.lookupId(small_id) if probe != None: print ' Id %s belongs to %s' % (small_id, probe) else: print ' (There is no taxon with id %s)' % small_id if __name__ == '__main__': if len(sys.argv) == 3: inclusions = sys.argv[1] taxname = sys.argv[2] else: print 'ignoring supplied args', sys.argv inclusions = 'inclusions.csv' taxname = 'tax/ott/' check(inclusions, Taxonomy.getTaxonomy(taxname, 'ott'))
def assemble(): # Create model taxonomy tax = UnionTaxonomy.newTaxonomy('ott') for name in ['Pentaphragma ellipticum', 'Lachnophyllum', 'Sipolisia', 'Cicerbita bourgaei', 'Adenophora triphylla', 'Artemisia vulgaris', 'Carlina libanotica', ]: tax.watch(name) # Establish homonym-resolution skeleton (not really used here) # skel = Taxonomy.getTaxonomy('tax/skel/', 'skel') # tax.setSkeleton(skel) # Add NCBI subset to the model taxonomy ncbi = Taxonomy.getTaxonomy('t/tax/ncbi_aster/', 'ncbi') # analyzeOTUs sets flags on questionable taxa ("unclassified" and so on) # to allow the option of suppression downstream ncbi.analyzeOTUs() align_and_merge(tax.alignment(ncbi)) # Add GBIF subset fo the model taxonomy gbif = Taxonomy.getTaxonomy('t/tax/gbif_aster/', 'gbif') gbif.smush() # analyzeMajorRankConflicts sets the "major_rank_conflict" flag when # intermediate ranks are missing (e.g. a family that's a child of a # class) gbif.analyzeMajorRankConflicts() align_and_merge(tax.alignment(gbif)) # "Old" patch system with tab-delimited files TsvEdits.edit(tax, 't/edits/') props = [ has_parent(taxon('Phellinaceae'), taxon('Asterales'), 'test:1') ] for prop in props: print proclaim(tax, prop) gen = tax.newTaxon("Opentreeia", "genus", "data:testing") gen.take(tax.newTaxon("Opentreeia sp. C", "species", "data:testing")) gen.take(tax.newTaxon("Opentreeia sp. D", "species", "data:testing")) # Example of referring to a taxon fam = tax.maybeTaxon("Phellinaceae") if fam != None: # Example of how you might add a genus to the taxonomy fam.take(gen) # Test deletion feature sp = tax.newTaxon("Opentreeia sp. C", "species", "data:testing") gen.take(sp) sp.prune("aster.py") # tax.loadPreferredIds('ids-that-are-otus.tsv') additions_repo_path = 't/feed/amendments/amendments-0' new_taxa_path = 't/new_taxa' # Assign identifiers to the taxa in the model taxonomy. Identifiers # assigned in the previous version are carried over to this version. ids = Taxonomy.getTaxonomy('t/tax/prev_aster/', 'ott') tax.carryOverIds(ids) # performs alignment Addition.processAdditions(additions_repo_path, tax) if False: # too slow for everyday testing purposes. print '-- Checking id list' assign_ids_from_list(tax, 'ott_id_list/by_qid.csv') tax.assignNewIds(new_taxa_path) tax.check() # Write the model taxonomy out to a set of files tax.dump('t/tax/aster/', '\t|\t')
import sys from org.opentreeoflife.taxa import Taxonomy from org.opentreeoflife.smasher import AlignmentByName from org.opentreeoflife.conflict import ConflictAnalysis rug = Taxonomy.getTaxonomy('scratch/Ruggiero/', 'rug') with open('scratch/Ruggiero.tre', 'w') as outfile: outfile.write(rug.toNewick(False)) outfile.write('\n')
# Requires python.security.respectJavaAccessibility = false # on java command line or in .jython from org.opentreeoflife.taxa import Taxonomy from org.opentreeoflife.smasher import UnionTaxonomy, HomonymReport union = UnionTaxonomy() skel = Taxonomy.getTaxonomy('tax/skel/', 'skel') union.setSkeleton(skel) def report(tax, tag): union.markDivisionsFromSkeleton(tax, skel) HomonymReport.homonymReport(tax, 'reports/' + tag + '-homonym-report.tsv') if True: ott = Taxonomy.getTaxonomy('tax/ott/', 'ott') report(ott, 'ott') else: import taxonomies report(taxonomies.loadSilva(), 'silva') report(taxonomies.loadH2007(), 'h2007') report(taxonomies.loadFung(), 'worms') report(taxonomies.loadFung(), 'if') report(taxonomies.loadNcbi(), 'ncbi') report(taxonomies.loadGbif(), 'gbif') report(taxonomies.loadIrmng(), 'irmng')
import sys, os, csv from org.opentreeoflife.taxa import Taxonomy, SourceTaxonomy, Taxon from org.opentreeoflife.smasher import UnionTaxonomy dwh = UnionTaxonomy.newTaxonomy('dwh') #Use this to tell smasher what separation file to use dwh.setSkeleton(Taxonomy.getTaxonomy('tax/separation/', 'separation')) # 1. trunk # 2. ictv # 3. IOC # 4. ASW # 5. ODO # 6. BOM # 7. ERE # 8. ONY # 9. EET # 10. NCBI # 11. WOR # 12. CLP # 13. COL #use this to load the taxonomies trunk = Taxonomy.getTaxonomy('t/tax/2018_12/dynamichierarchytrunk2018-11-21/', 'trunk') ictv = Taxonomy.getTaxonomy( 't/tax/2018_12/ICTV-virus_taxonomy-with-higherClassification/', 'ictv') IOC = Taxonomy.getTaxonomy('t/tax/2018_12/ioc-birdlist/', 'IOC')
small = row[0] big = row[1] small_id = row[2] small_tax = ott.maybeTaxon(small_id) if small_tax == None: small_tax = ott.maybeTaxon(small) if small_tax == None: print '** No unique taxon with id %s or name %s' % (small_id, small) else: print '** %s is %s, not %s' % (small, small_tax.id, small_id) else: look = ott.maybeTaxon(small, big) if look == None: print '** %s=%s not under %s' % (small, small_id, big) small_tax.show() elif look != small_tax: print '** The %s that descends from %s is %s, not %s' % (small, big, look.id, small_id) if small_tax.isHidden(): print '%s (%s) is hidden' % (small, small_id) infile.close() if __name__ == '__main__': taxname = 'tax/ott/' if len(sys.argv) > 1: taxname = sys.argv[1] else: print sys.argv check(Taxonomy.getTaxonomy(taxname))
import sys, os, csv from org.opentreeoflife.taxa import Taxonomy, SourceTaxonomy, Taxon from org.opentreeoflife.smasher import UnionTaxonomy dwh = UnionTaxonomy.newTaxonomy('dwh') #Use this to tell smasher what separation file to use dwh.setSkeleton(Taxonomy.getTaxonomy('tax/separation/', 'separation')) # 1. trunk # 2. ictv # 3. IOC # 4. ASW # 5. ODO # 6. BOM # 7. ERE # 8. ONY # 9. EET # 10. NCBI # 11. WOR # 12. CLP # 13. COL #use this to load the taxonomies trunk = Taxonomy.getTaxonomy('t/tax/2018_12/trunk/', 'trunk') ictv = Taxonomy.getTaxonomy('t/tax/2018_12/ictv/', 'ictv') IOC = Taxonomy.getTaxonomy('t/tax/2018_12/IOC/', 'IOC') ASW = Taxonomy.getTaxonomy('t/tax/2018_12/ASW/', 'ASW') ODO = Taxonomy.getTaxonomy('t/tax/2018_12/ODO/', 'ODO')
import sys from org.opentreeoflife.taxa import Taxonomy from org.opentreeoflife.smasher import AlignmentByName from org.opentreeoflife.conflict import ConflictAnalysis rug = Taxonomy.getTaxonomy('scratch/Ruggiero/', 'rug') with open('scratch/Ruggiero.tre', 'w') as outfile: outfile.write(rug.toNewick(False)) outfile.write('\n')
# One-off script prepared to provide data to David Hibbett and Romina Gazis. # Lists numbers of species in each fungal order. from org.opentreeoflife.taxa import Taxonomy import csv, sys from taxonomies import load_fung, load_ncbi, load_gbif, load_irmng taxonomies = [('fung', load_fung(), 'Index Fungorum'), ('ncbi', load_ncbi(), 'NCBI'), ('gbif', load_gbif(), 'GBIF'), ('irmng', load_irmng(), 'IRMNG'), ('ott', Taxonomy.getTaxonomy('tax/ott/'), 'OTT 2.9'), ] def main(): infile = open('order-counts-orders.csv', 'r') reader = csv.reader(infile) reader.next() #header row taxa = ['Fungi'] for tuple in reader: taxa.append(tuple[0]) infile.close() write_counts(taxa) def write_counts(taxa): outfile = open('order-counts.csv', 'w') writer = csv.writer(outfile) header = ['order'] for (name, taxonomy, label) in taxonomies: header += [label + ' bin', label + ' sp', label + ' tip']
small_node = small_nodes[0] small_tax = small_node.taxon() if small_id != '' and small_tax != small_id_tax: print '** The id of %s in %s is %s (expected %s)' % (small, big, small_tax.id, small_id) show_interloper(small_node, small_id, ott) else: print '** More than one taxon named %s is in %s' % (small, big) print ' ', small_nodes infile.close() def show_interloper(small_node, small_id, ott): if small_node != small_node.taxon(): print ' %s is a synonym for %s' % (small_node.name, small_node.taxon().name) probe = ott.lookupId(small_id) if probe != None: print ' Id %s belongs to %s' % (small_id, probe) else: print ' (There is no taxon with id %s)' % small_id if __name__ == '__main__': if len(sys.argv) == 3: inclusions = sys.argv[1] taxname = sys.argv[2] else: print 'ignoring supplied args', sys.argv inclusions = 'inclusions.csv' taxname = 'tax/ott/' check(inclusions, Taxonomy.getTaxonomy(taxname, 'ott'))
def create_ott(ott_spec): # Fail fast additions_clone_path = os.path.join(access_head('amendments'), 'amendments-1') if not os.path.isdir(additions_clone_path): print '# cannot find', additions_clone_path sys.exit(1) with open(os.path.join(access_head('idlist'), 'by_qid.csv'), 'r') as infile: print '# can access idlist' ott_path = management.source_path(ott_spec) ott = UnionTaxonomy.newTaxonomy('ott') # Would be nice if there were tests for all of these... for name in names_of_interest: ott.eventLogger.namesOfInterest.add(name) ott.setSkeleton(Taxonomy.getTaxonomy('curation/separation/', 'separation')) # These are particularly hard cases; create alignment targets up front adjustments.deal_with_polysemies(ott) # Align and merge each source in sequence merge_sources(ott) # "Old" patch system TsvEdits.edit(ott, 'curation/edits/') # consider try: ... except: print '**** Exception in patch_ott' amendments.patch_ott(ott) # End of topology changes. Now assign ids. retain_ids(ott, access_source('ott-PREVIOUS'), os.path.join(access_head('idlist'), 'by_qid.csv')) # Apply the additions (which already have ids assigned). # This has to happen *after* ids are assigned, since additions use OTT # ids to identify parents. print '-- Processing additions --' Addition.processAdditions(additions_clone_path, ott) # Mint ids for new nodes print '-- Minting new ids --' ott.assignNewIds(new_taxa_path) # Remove all trees but the largest (or make them life incertae sedis) ott.deforestate() # data structure integrity checks ott.check() # For deprecated id report (dump) ott.loadPreferredIds('ids_that_are_otus.tsv', False) ott.loadPreferredIds('ids_in_synthesis.tsv', True) ott.dump(ott_path) record_ott_sources(ott_spec) return ott
def create_ott(): ott = UnionTaxonomy.newTaxonomy() # There ought to be tests for all of these... for name in names_of_interest: ott.eventlogger.namesOfInterest.add(name) # When lumping, prefer to use ids that have been used in OTU matching # This list could be used for all sorts of purposes... ott.loadPreferredIds('ids-that-are-otus.tsv', False) ott.loadPreferredIds('ids-in-synthesis.tsv', True) ott.setSkeleton(Taxonomy.getTaxonomy('tax/skel/', 'skel')) silva = prepare_silva(ott) ott.absorb(silva) check_invariants(ott) h2007 = prepare_h2007(ott) ott.absorb(h2007) (fungi, fungorum_sans_fungi) = prepare_fungorum(ott) ott.absorb(fungi) check_invariants(ott) # the non-Fungi from Index Fungorum get absorbed below lamiales = prepare_lamiales(ott) ott.absorb(lamiales) (malacostraca, worms_sans_malacostraca) = prepare_worms(ott) ott.absorb(malacostraca) ncbi = prepare_ncbi(ott) align_ncbi_to_silva(ncbi, silva, ott) ott.absorb(ncbi) check_invariants(ott) ott.absorb(worms_sans_malacostraca) ott.absorb(fungorum_sans_fungi) gbif = prepare_gbif(ott) ott.absorb(gbif) irmng = prepare_irmng(ott) ott.absorb(irmng) taxonomies.link_to_h2007(ott) get_default_extinct_info_from_gbif(gbif, ott) check_invariants(ott) # consider try: ... except: print '**** Exception in patch_ott' patch_ott(ott) # Experimental... unextinct_ncbi(ncbi, ott) # Remove all trees but the largest (or make them life incertae sedis) ott.deforestate() # ----------------------------------------------------------------------------- # OTT id assignment # Force some id assignments... will try to automate this in the future. # Most of these come from looking at the otu-deprecated.tsv file after a # series of smasher runs. for (inf, sup, id) in [ ('Tipuloidea', 'Diptera', '722875'), ('Saccharomycetes', 'Saccharomycotina', '989999'), ('Phaeosphaeria', 'Ascomycota', '5486272'), ('Synedra acus','Eukaryota','992764'), ('Epiphloea','Halymeniaceae','5342325'), ('Hessea','Archaeplastida','600099'), ('Morganella','Arthropoda','6400'), ('Rhynchonelloidea','Rhynchonellidae','5316010'), ('Epiphloea', 'Lichinales', '5342482'), ('Morganella', 'Fungi', '973932'), ('Parmeliaceae', 'Lecanorales', '305904'), ]: tax = ott.taxon(inf, sup) if tax != None: tax.setId(id) ott.taxonThatContains('Rhynchonelloidea', 'Sphenarina').setId('795939') # NCBI for (ncbi_id, ott_id, name) in ncbi_assignments_list: n = ncbi.maybeTaxon(ncbi_id) if n != None: im = ott.image(n) if im != None: im.setId(ott_id) else: print '** NCBI %s not mapped - %s' % (ncbi_id, name) else: print '** No NCBI taxon %s - %s' % (ncbi_id, name) # Cylindrocarpon is now Neonectria ott.image(gbif.taxon('2563163')).setId('51754') # Foo trich = fungi.maybeTaxon('Trichosporon') if trich != None: ott.image(trich).setId('364222') #ott.image(fungi.taxon('11060')).setId('4107132') #Cryptococcus - a total mess # Assign OTT ids to taxa that don't have them, re-using old ids when possible ids = Taxonomy.getTaxonomy('tax/prev_ott/') # Assign old ids to nodes in the new version ott.assignIds(ids) report_on_h2007(h2007, ott) return ott
# Requires python.security.respectJavaAccessibility = false # on java command line or in .jython from org.opentreeoflife.taxa import Taxonomy from org.opentreeoflife.smasher import UnionTaxonomy, HomonymReport union = UnionTaxonomy() skel = Taxonomy.getTaxonomy('tax/skel/', 'skel') union.setSkeleton(skel) def report(tax, tag): union.markDivisionsFromSkeleton(tax, skel) HomonymReport.homonymReport(tax, 'reports/' + tag + '-homonym-report.tsv') if True: ott = Taxonomy.getTaxonomy('tax/ott/', 'ott') report(ott, 'ott') else: import taxonomies report(taxonomies.loadSilva(), 'silva') report(taxonomies.loadH2007(), 'h2007') report(taxonomies.loadFung(), 'worms') report(taxonomies.loadFung(), 'if') report(taxonomies.loadNcbi(), 'ncbi') report(taxonomies.loadGbif(), 'gbif') report(taxonomies.loadIrmng(), 'irmng')
# counts number of taxa with rank=family in a given taxon from org.opentreeoflife.taxa import Taxonomy, Rank import argparse parser = argparse.ArgumentParser(description='load nexsons into postgres') parser.add_argument('taxonname', help='name of taxon to count') args = parser.parse_args() name = args.taxonname ott_path = '/Users/karen/Documents/opentreeoflife/data/ott/ott2.9draft12/' ott = Taxonomy.getTaxonomy(ott_path, 'ott') def count_families(taxon): count = 0 with open('families.txt', 'w') as f: for t in taxon.descendants(False): if t.rank == Rank.FAMILY_RANK: f.write("{n}\n".format(n=t.name)) count += 1 f.close() return count print "number families: ", count_families(ott.taxon(name))