def doit(): rug = Taxonomy.getTaxonomy('scratch/Ruggiero/', 'rug') ott = Taxonomy.getTaxonomy('tax/ott/', 'ott') union = UnionTaxonomy.newTaxonomy('ott') union.absorb(rug) union.absorb(ott) union.dump('scratch/compare_Ruggiero/', '\t')
def combine(t, s, bluster): u = UnionTaxonomy.newTaxonomy('union') u.blustery = 0 ta = u.alignment(t) u.align(ta) u.merge(ta) u.blustery = bluster sa = u.alignment(s) for root in s.roots(): sa.alignTaxon(root) u.align(sa) u.merge(sa) u.check() return u
def combine(sep, t, s, bluster): u = UnionTaxonomy.newTaxonomy('union') u.blustery = 0 u.setSkeleton(sep) ta = u.alignment(t) u.align(ta) u.merge(ta) print u.lookup('a') u.blustery = bluster print s.lookup('a') sa = u.alignment(s) for root in s.roots(): sa.alignTaxon(root) u.align(sa) debug_alignment(sa) return u
def create_ott(ott_spec): # Fail fast additions_clone_path = os.path.join(access_head('amendments'), 'amendments-1') if not os.path.isdir(additions_clone_path): print '# cannot find', additions_clone_path sys.exit(1) with open(os.path.join(access_head('idlist'), 'by_qid.csv'), 'r') as infile: print '# can access idlist' ott_path = management.source_path(ott_spec) ott = UnionTaxonomy.newTaxonomy('ott') # Would be nice if there were tests for all of these... for name in names_of_interest: ott.eventLogger.namesOfInterest.add(name) ott.setSkeleton(Taxonomy.getTaxonomy('curation/separation/', 'separation')) # These are particularly hard cases; create alignment targets up front adjustments.deal_with_polysemies(ott) # Align and merge each source in sequence merge_sources(ott) # "Old" patch system TsvEdits.edit(ott, 'curation/edits/') # consider try: ... except: print '**** Exception in patch_ott' amendments.patch_ott(ott) # End of topology changes. Now assign ids. retain_ids(ott, access_source('ott-PREVIOUS'), os.path.join(access_head('idlist'), 'by_qid.csv')) # Apply the additions (which already have ids assigned). # This has to happen *after* ids are assigned, since additions use OTT # ids to identify parents. print '-- Processing additions --' Addition.processAdditions(additions_clone_path, ott) # Mint ids for new nodes print '-- Minting new ids --' ott.assignNewIds(new_taxa_path) # Remove all trees but the largest (or make them life incertae sedis) ott.deforestate() # data structure integrity checks ott.check() # For deprecated id report (dump) ott.loadPreferredIds('ids_that_are_otus.tsv', False) ott.loadPreferredIds('ids_in_synthesis.tsv', True) ott.dump(ott_path) record_ott_sources(ott_spec) return ott
import sys, os, csv from org.opentreeoflife.taxa import Taxonomy, SourceTaxonomy, Taxon from org.opentreeoflife.smasher import UnionTaxonomy dwh = UnionTaxonomy.newTaxonomy('dwh') #Use this to tell smasher what separation file to use dwh.setSkeleton(Taxonomy.getTaxonomy('tax/separation/', 'separation')) # 1. trunk # 2. ictv # 3. IOC # 4. ASW # 5. ODO # 6. BOM # 7. ERE # 8. ONY # 9. EET # 10. NCBI # 11. WOR # 12. CLP # 13. COL #use this to load the taxonomies trunk = Taxonomy.getTaxonomy('t/tax/2018_12/trunk/', 'trunk') ictv = Taxonomy.getTaxonomy('t/tax/2018_12/ictv/', 'ictv') IOC = Taxonomy.getTaxonomy('t/tax/2018_12/IOC/', 'IOC') ASW = Taxonomy.getTaxonomy('t/tax/2018_12/ASW/', 'ASW') ODO = Taxonomy.getTaxonomy('t/tax/2018_12/ODO/', 'ODO')
# Command line argument = file to write to # Writes a row for every OTT id that # (a) occurs in tax/ott/, # (b) occurs as an OTU in phylesystem, # (c) is sourced only from in IRMNG. import csv, sys from org.opentreeoflife.taxa import Taxonomy, Rank from org.opentreeoflife.smasher import UnionTaxonomy union = UnionTaxonomy.newTaxonomy('ott') union.loadPreferredIds('ids_that_are_otus.tsv', False) union.loadPreferredIds('ids_in_synthesis.tsv', True) ott = Taxonomy.getTaxonomy('tax/ott/', 'ott') #ott = Taxonomy.getTaxonomy('t/tax/aster/', 'ott') with open(sys.argv[1], 'w') as outfile: writer = csv.writer(outfile) writer.writerow(['irmng','ott','name','synthesis']) for taxon in ott.taxa(): # if (taxon.rank == Rank.SPECIES_RANK and ...) if (len(taxon.sourceIds) == 1 and taxon.sourceIds[0].prefix == 'irmng'): probe = union.importantIds.lookupId(taxon.id) if probe != None: writer.writerow([taxon.sourceIds[0].id, taxon.id, taxon.name, 'synthesis' if probe.inSynthesis else ''])
# Command line argument = file to write to # Writes a row for every OTT id that # (a) occurs in tax/ott/, # (b) occurs as an OTU in phylesystem, # (c) is sourced only from in IRMNG. import csv, sys from org.opentreeoflife.taxa import Taxonomy, Rank from org.opentreeoflife.smasher import UnionTaxonomy union = UnionTaxonomy.newTaxonomy('ott') union.loadPreferredIds('ids_that_are_otus.tsv', False) union.loadPreferredIds('ids_in_synthesis.tsv', True) ott = Taxonomy.getTaxonomy('tax/ott/', 'ott') #ott = Taxonomy.getTaxonomy('t/tax/aster/', 'ott') with open(sys.argv[1], 'w') as outfile: writer = csv.writer(outfile) writer.writerow(['irmng', 'ott', 'name', 'synthesis']) for taxon in ott.taxa(): # if (taxon.rank == Rank.SPECIES_RANK and ...) if (len(taxon.sourceIds) == 1 and taxon.sourceIds[0].prefix == 'irmng'): probe = union.importantIds.lookupId(taxon.id) if probe != None: writer.writerow([ taxon.sourceIds[0].id, taxon.id, taxon.name, 'synthesis' if probe.inSynthesis else '' ])
def create_ott(): ott = UnionTaxonomy.newTaxonomy() # There ought to be tests for all of these... for name in names_of_interest: ott.eventlogger.namesOfInterest.add(name) # When lumping, prefer to use ids that have been used in OTU matching # This list could be used for all sorts of purposes... ott.loadPreferredIds('ids-that-are-otus.tsv', False) ott.loadPreferredIds('ids-in-synthesis.tsv', True) ott.setSkeleton(Taxonomy.getTaxonomy('tax/skel/', 'skel')) silva = prepare_silva(ott) ott.absorb(silva) check_invariants(ott) h2007 = prepare_h2007(ott) ott.absorb(h2007) (fungi, fungorum_sans_fungi) = prepare_fungorum(ott) ott.absorb(fungi) check_invariants(ott) # the non-Fungi from Index Fungorum get absorbed below lamiales = prepare_lamiales(ott) ott.absorb(lamiales) (malacostraca, worms_sans_malacostraca) = prepare_worms(ott) ott.absorb(malacostraca) ncbi = prepare_ncbi(ott) align_ncbi_to_silva(ncbi, silva, ott) ott.absorb(ncbi) check_invariants(ott) ott.absorb(worms_sans_malacostraca) ott.absorb(fungorum_sans_fungi) gbif = prepare_gbif(ott) ott.absorb(gbif) irmng = prepare_irmng(ott) ott.absorb(irmng) taxonomies.link_to_h2007(ott) get_default_extinct_info_from_gbif(gbif, ott) check_invariants(ott) # consider try: ... except: print '**** Exception in patch_ott' patch_ott(ott) # Experimental... unextinct_ncbi(ncbi, ott) # Remove all trees but the largest (or make them life incertae sedis) ott.deforestate() # ----------------------------------------------------------------------------- # OTT id assignment # Force some id assignments... will try to automate this in the future. # Most of these come from looking at the otu-deprecated.tsv file after a # series of smasher runs. for (inf, sup, id) in [ ('Tipuloidea', 'Diptera', '722875'), ('Saccharomycetes', 'Saccharomycotina', '989999'), ('Phaeosphaeria', 'Ascomycota', '5486272'), ('Synedra acus','Eukaryota','992764'), ('Epiphloea','Halymeniaceae','5342325'), ('Hessea','Archaeplastida','600099'), ('Morganella','Arthropoda','6400'), ('Rhynchonelloidea','Rhynchonellidae','5316010'), ('Epiphloea', 'Lichinales', '5342482'), ('Morganella', 'Fungi', '973932'), ('Parmeliaceae', 'Lecanorales', '305904'), ]: tax = ott.taxon(inf, sup) if tax != None: tax.setId(id) ott.taxonThatContains('Rhynchonelloidea', 'Sphenarina').setId('795939') # NCBI for (ncbi_id, ott_id, name) in ncbi_assignments_list: n = ncbi.maybeTaxon(ncbi_id) if n != None: im = ott.image(n) if im != None: im.setId(ott_id) else: print '** NCBI %s not mapped - %s' % (ncbi_id, name) else: print '** No NCBI taxon %s - %s' % (ncbi_id, name) # Cylindrocarpon is now Neonectria ott.image(gbif.taxon('2563163')).setId('51754') # Foo trich = fungi.maybeTaxon('Trichosporon') if trich != None: ott.image(trich).setId('364222') #ott.image(fungi.taxon('11060')).setId('4107132') #Cryptococcus - a total mess # Assign OTT ids to taxa that don't have them, re-using old ids when possible ids = Taxonomy.getTaxonomy('tax/prev_ott/') # Assign old ids to nodes in the new version ott.assignIds(ids) report_on_h2007(h2007, ott) return ott
def assemble(): # Create model taxonomy tax = UnionTaxonomy.newTaxonomy('ott') for name in ['Pentaphragma ellipticum', 'Lachnophyllum', 'Sipolisia', 'Cicerbita bourgaei', 'Adenophora triphylla', 'Artemisia vulgaris', 'Carlina libanotica', ]: tax.watch(name) # Establish homonym-resolution skeleton (not really used here) # skel = Taxonomy.getTaxonomy('tax/skel/', 'skel') # tax.setSkeleton(skel) # Add NCBI subset to the model taxonomy ncbi = Taxonomy.getTaxonomy('t/tax/ncbi_aster/', 'ncbi') # analyzeOTUs sets flags on questionable taxa ("unclassified" and so on) # to allow the option of suppression downstream ncbi.analyzeOTUs() align_and_merge(tax.alignment(ncbi)) # Add GBIF subset fo the model taxonomy gbif = Taxonomy.getTaxonomy('t/tax/gbif_aster/', 'gbif') gbif.smush() # analyzeMajorRankConflicts sets the "major_rank_conflict" flag when # intermediate ranks are missing (e.g. a family that's a child of a # class) gbif.analyzeMajorRankConflicts() align_and_merge(tax.alignment(gbif)) # "Old" patch system with tab-delimited files TsvEdits.edit(tax, 't/edits/') props = [ has_parent(taxon('Phellinaceae'), taxon('Asterales'), 'test:1') ] for prop in props: print proclaim(tax, prop) gen = tax.newTaxon("Opentreeia", "genus", "data:testing") gen.take(tax.newTaxon("Opentreeia sp. C", "species", "data:testing")) gen.take(tax.newTaxon("Opentreeia sp. D", "species", "data:testing")) # Example of referring to a taxon fam = tax.maybeTaxon("Phellinaceae") if fam != None: # Example of how you might add a genus to the taxonomy fam.take(gen) # Test deletion feature sp = tax.newTaxon("Opentreeia sp. C", "species", "data:testing") gen.take(sp) sp.prune("aster.py") # tax.loadPreferredIds('ids-that-are-otus.tsv') additions_repo_path = 't/feed/amendments/amendments-0' new_taxa_path = 't/new_taxa' # Assign identifiers to the taxa in the model taxonomy. Identifiers # assigned in the previous version are carried over to this version. ids = Taxonomy.getTaxonomy('t/tax/prev_aster/', 'ott') tax.carryOverIds(ids) # performs alignment Addition.processAdditions(additions_repo_path, tax) if False: # too slow for everyday testing purposes. print '-- Checking id list' assign_ids_from_list(tax, 'ott_id_list/by_qid.csv') tax.assignNewIds(new_taxa_path) tax.check() # Write the model taxonomy out to a set of files tax.dump('t/tax/aster/', '\t|\t')
def assemble(): # Create model taxonomy tax = UnionTaxonomy.newTaxonomy('ott') for name in [ 'Pentaphragma ellipticum', 'Lachnophyllum', 'Sipolisia', 'Cicerbita bourgaei', 'Adenophora triphylla', 'Artemisia vulgaris', 'Carlina libanotica', ]: tax.watch(name) # Establish homonym-resolution skeleton (not really used here) # skel = Taxonomy.getTaxonomy('tax/skel/', 'skel') # tax.setSkeleton(skel) # Add NCBI subset to the model taxonomy ncbi = Taxonomy.getTaxonomy('t/tax/ncbi_aster/', 'ncbi') # analyzeOTUs sets flags on questionable taxa ("unclassified" and so on) # to allow the option of suppression downstream ncbi.analyzeOTUs() align_and_merge(tax.alignment(ncbi)) # Add GBIF subset fo the model taxonomy gbif = Taxonomy.getTaxonomy('t/tax/gbif_aster/', 'gbif') gbif.smush() # analyzeMajorRankConflicts sets the "major_rank_conflict" flag when # intermediate ranks are missing (e.g. a family that's a child of a # class) gbif.analyzeMajorRankConflicts() align_and_merge(tax.alignment(gbif)) # "Old" patch system with tab-delimited files TsvEdits.edit(tax, 't/edits/') props = [has_parent(taxon('Phellinaceae'), taxon('Asterales'), 'test:1')] for prop in props: print proclaim(tax, prop) gen = tax.newTaxon("Opentreeia", "genus", "data:testing") gen.take(tax.newTaxon("Opentreeia sp. C", "species", "data:testing")) gen.take(tax.newTaxon("Opentreeia sp. D", "species", "data:testing")) # Example of referring to a taxon fam = tax.maybeTaxon("Phellinaceae") if fam != None: # Example of how you might add a genus to the taxonomy fam.take(gen) # Test deletion feature sp = tax.newTaxon("Opentreeia sp. C", "species", "data:testing") gen.take(sp) sp.prune("aster.py") # tax.loadPreferredIds('ids-that-are-otus.tsv') additions_repo_path = 't/feed/amendments/amendments-0' new_taxa_path = 't/new_taxa' # Assign identifiers to the taxa in the model taxonomy. Identifiers # assigned in the previous version are carried over to this version. ids = Taxonomy.getTaxonomy('t/tax/prev_aster/', 'ott') tax.carryOverIds(ids) # performs alignment Addition.processAdditions(additions_repo_path, tax) if False: # too slow for everyday testing purposes. print '-- Checking id list' assign_ids_from_list(tax, 'ott_id_list/by_qid.csv') tax.assignNewIds(new_taxa_path) tax.check() # Write the model taxonomy out to a set of files tax.dump('t/tax/aster/', '\t|\t')