예제 #1
0
def test():
    tax = Taxonomy.newTaxonomy()
    tax.newTaxon('Mouse', 'species', 'about:blank')
    tax.newTaxon('Dog', 'species', 'about:blank')
    tax.newTaxon('Mammal', 'class', 'about:blank')

    new_claims = [
        Has_child('Mammal', 'Mouse', 'about:blank'),
        Whether_same('Mouse', 'Mus', True),
        Whether_same('Muus', 'Mouse', True),
    ]
    expectations = [
        Has_child('Mammal', 'Mouse'),
        Has_child('Mammal', With_ancestor('Mouse', 'Mammal')),
        Has_child(With_descendant('Mammal', 'Mouse'), 'Mouse'),
        Whether_same('Mammal', 'Mammal', True),
        Whether_same('Mammal', 'Meemmal', False),
        Whether_same('Mouse', 'Mus', True),
        Whether_same('Mus', 'Mouse', True),
        Whether_same('Mus', 'Horse', False),
    ]
    surprises = [
        Has_child('Mouse', 'Mammal'),
        Has_child('Mammal', 'Dog'),  # fails
        Whether_same('Mammal', 'Meemmal', True),
        Whether_same('Mammal', 'Mammal', False),
    ]
    make_claims(tax, new_claims)
    passed = test_claims(tax, expectations)
    passed = test_claims(tax, new_claims)
    find_surprises(tax, surprises)
    return passed
예제 #2
0
def test():
    tax = Taxonomy.newTaxonomy()
    tax.newTaxon('Mouse', 'species', 'about:blank')
    tax.newTaxon('Dog', 'species', 'about:blank')
    tax.newTaxon('Mammal', 'class', 'about:blank')

    new_claims = [
        Has_child('Mammal', 'Mouse', 'about:blank'),
        Whether_same('Mouse', 'Mus', True),
        Whether_same('Muus', 'Mouse', True),
    ]
    expectations = [
        Has_child('Mammal', 'Mouse'),
        Has_child('Mammal', With_ancestor('Mouse', 'Mammal')),
        Has_child(With_descendant('Mammal', 'Mouse'), 'Mouse'),
        Whether_same('Mammal', 'Mammal', True),
        Whether_same('Mammal', 'Meemmal', False),
        Whether_same('Mouse', 'Mus', True),
        Whether_same('Mus', 'Mouse', True),
        Whether_same('Mus', 'Horse', False),
    ]
    surprises = [
        Has_child('Mouse', 'Mammal'),
        Has_child('Mammal', 'Dog'), # fails
        Whether_same('Mammal', 'Meemmal', True),
        Whether_same('Mammal', 'Mammal', False),
    ]
    make_claims(tax, new_claims)
    passed = test_claims(tax, expectations)
    passed = test_claims(tax, new_claims)
    find_surprises(tax, surprises)
    return passed
예제 #3
0
# Jython script to build the "model village" taxonomy.

from org.opentreeoflife.smasher import Taxonomy

# Create model taxonomy
tax = Taxonomy.newTaxonomy()

# Establish homonym-resolution skeleton (not really used here)
skel = Taxonomy.getTaxonomy('tax/skel/', 'skel')
tax.setSkeleton(skel)

# Add NCBI subset to the model taxonomy
ncbi = Taxonomy.getTaxonomy('t/tax/ncbi_aster/')
# analyzeOTUs sets flags on questionable taxa ("unclassified" and so on)
#  to allow the option of suppression downstream
ncbi.analyzeOTUs()
tax.absorb(ncbi)

# Add GBIF subset fo the model taxonomy
gbif = Taxonomy.getTaxonomy('t/tax/gbif_aster/')
# analyzeMajorRankConflicts sets the "major_rank_conflict" flag when
# intermediate ranks are missing (e.g. a family that's a child of a
# class)
gbif.analyzeMajorRankConflicts()
tax.absorb(gbif)

# "Old" patch system with tab-delimited files
tax.edit('t/edits/')

# Example of referring to a taxon
fam = tax.taxon("Phellinaceae")
from org.opentreeoflife.smasher import Taxonomy
from org.opentreeoflife.smasher import Reportx
import taxonomies

ott = Taxonomy.newTaxonomy()
skel = Taxonomy.getTaxonomy('tax/skel/', 'skel')
ott.setSkeleton(skel)


def report(tax, tag):
    ott.markDivisions(tax)
    #	Reportx.bogotypes(tax)
    taxonomies.checkDivisions(tax)
    Reportx.report(tax, tag + '-mrca-report.tsv')


if True:
    report(taxonomies.loadIrmng(), 'irmng')
else:
    silva = taxonomies.loadSilva()
    ott.notSame(silva.taxon('Ctenophora', 'Coscinodiscophytina'),
                skel.taxon('Ctenophora'))
    report(silva, 'silva')
    report(taxonomies.loadH2007(), 'h2007')
    report(taxonomies.loadFung(), 'if')
    report(taxonomies.loadNcbi(), 'ncbi')
    report(taxonomies.loadGbif(), 'gbif')
    report(taxonomies.loadIrmng(), 'irmng')
    report(taxonomies.loadOtt(), 'ott')
예제 #5
0
# Jython script to build the Open Tree reference taxonomy
# coding=utf-8

# Unless specified otherwise issues are in the reference-taxonomy repo:
# https://github.com/OpenTreeOfLife/reference-taxonomy/issues/...

import sys

from org.opentreeoflife.smasher import Taxonomy
import taxonomies
sys.path.append("feed/misc/")
from chromista_spreadsheet import fixChromista

ott = Taxonomy.newTaxonomy()
skel = Taxonomy.getTaxonomy('tax/skel/', 'skel')
ott.setSkeleton(skel)

# ----- SILVA microbial taxonomy -----
def doSilva():

	silva = taxonomies.loadSilva()

	# - Deal with parent/child homonyms in SILVA -
	# Arbitrary choices here to eliminate ambiguities down the road when NCBI gets merged.
	# (If the homonym is retained, then the merge algorithm will have no
	# way to choose between them, and refuse to match either.  It will
	# then create a third homonym.)
	# Note order dependence between the following two
	silva.taxon('Intramacronucleata','Intramacronucleata').rename('Intramacronucleata inf.')
	silva.taxon('Spirotrichea','Intramacronucleata inf.').rename('Spirotrichea inf.')
	silva.taxon('Cyanobacteria','Bacteria').rename('Cyanobacteria sup.')
예제 #6
0
# Jython script to build the "model village" taxonomy.

from org.opentreeoflife.smasher import Taxonomy

# Create model taxonomy
tax = Taxonomy.newTaxonomy()

# Establish homonym-resolution skeleton (not really used here)
skel = Taxonomy.getTaxonomy('tax/skel/', 'skel')
tax.setSkeleton(skel)


# Add NCBI subset to the model taxonomy
ncbi = Taxonomy.getTaxonomy('t/tax/ncbi_aster/')
# analyzeOTUs sets flags on questionable taxa ("unclassified" and so on)
#  to allow the option of suppression downstream
ncbi.analyzeOTUs()
tax.absorb(ncbi)

# Add GBIF subset fo the model taxonomy
gbif = Taxonomy.getTaxonomy('t/tax/gbif_aster/')
# analyzeMajorRankConflicts sets the "major_rank_conflict" flag when
# intermediate ranks are missing (e.g. a family that's a child of a
# class)
gbif.analyzeMajorRankConflicts()
tax.absorb(gbif)

# "Old" patch system with tab-delimited files
tax.edit('t/edits/')

# Example of referring to a taxon
예제 #7
0
def create_ott():

    ott = Taxonomy.newTaxonomy()

    # There ought to be tests for all of these...

    for name in names_of_interest:
        ott.namesOfInterest.add(name)

    # When lumping, prefer to use ids that have been used in OTU matching
    # This list could be used for all sorts of purposes...
    ott.loadPreferredIds('ids-that-are-otus.tsv', False)
    ott.loadPreferredIds('ids-in-synthesis.tsv', True)

    ott.setSkeleton(Taxonomy.getTaxonomy('tax/skel/', 'skel'))

    silva = prepare_silva(ott)
    ott.absorb(silva)
    check_invariants(ott)

    h2007 = prepare_h2007(ott)
    ott.absorb(h2007)

    (fungi, fungorum_sans_fungi) = prepare_fungorum(ott)
    ott.absorb(fungi)
    check_invariants(ott)

    # the non-Fungi from Index Fungorum get absorbed below

    lamiales = prepare_lamiales(ott)
    ott.absorb(lamiales)

    (malacostraca, worms_sans_malacostraca) = prepare_worms(ott)
    ott.absorb(malacostraca)

    ncbi = prepare_ncbi(ott)
    align_ncbi_to_silva(ncbi, silva, ott)
    ott.absorb(ncbi)
    check_invariants(ott)

    ott.absorb(worms_sans_malacostraca)

    ott.absorb(fungorum_sans_fungi)

    gbif = prepare_gbif(ott)
    ott.absorb(gbif)

    irmng = prepare_irmng(ott)
    ott.absorb(irmng)

    taxonomies.link_to_h2007(ott)

    get_default_extinct_info_from_gbif(gbif, ott)

    check_invariants(ott)
    # consider try: ... except: print '**** Exception in patch_ott'
    patch_ott(ott)

    # Experimental...
    unextinct_ncbi(ncbi, ott)

    # Remove all trees but the largest (or make them life incertae sedis)
    ott.deforestate()

    # -----------------------------------------------------------------------------
    # OTT id assignment

    # Force some id assignments... will try to automate this in the future.
    # Most of these come from looking at the otu-deprecated.tsv file after a 
    # series of smasher runs.

    for (inf, sup, id) in [
            ('Tipuloidea', 'Diptera', '722875'),
            ('Saccharomycetes', 'Saccharomycotina', '989999'),
            ('Phaeosphaeria', 'Ascomycota', '5486272'),
            ('Synedra acus','Eukaryota','992764'),
            ('Epiphloea','Halymeniaceae','5342325'),
            ('Hessea','Archaeplastida','600099'),
            ('Morganella','Arthropoda','6400'),
            ('Rhynchonelloidea','Rhynchonellidae','5316010'),
            ('Epiphloea', 'Lichinales', '5342482'),
            ('Morganella', 'Fungi', '973932'),
            ('Parmeliaceae', 'Lecanorales', '305904'),
    ]:
        tax = ott.taxon(inf, sup)
        if tax != None:
            tax.setId(id)

    ott.taxonThatContains('Rhynchonelloidea', 'Sphenarina').setId('795939') # NCBI

    for (ncbi_id, ott_id, name) in ncbi_assignments_list:
        n = ncbi.maybeTaxon(ncbi_id)
        if n != None:
            im = ott.image(n)
            if im != None:
                im.setId(ott_id)
            else:
                print '** NCBI %s not mapped - %s' % (ncbi_id, name)
        else:
            print '** No NCBI taxon %s - %s' % (ncbi_id, name)

    # Cylindrocarpon is now Neonectria
    ott.image(gbif.taxon('2563163')).setId('51754')

    # Foo
    trich = fungi.maybeTaxon('Trichosporon')
    if trich != None:
        ott.image(trich).setId('364222')

    #ott.image(fungi.taxon('11060')).setId('4107132') #Cryptococcus - a total mess


    # Assign OTT ids to taxa that don't have them, re-using old ids when possible
    ids = Taxonomy.getTaxonomy('tax/prev_ott/')

    # Assign old ids to nodes in the new version
    ott.assignIds(ids)

    report_on_h2007(h2007, ott)

    return ott
예제 #8
0
def create_ott():

    ott = Taxonomy.newTaxonomy()

    # There ought to be tests for all of these...

    for name in names_of_interest:
        ott.namesOfInterest.add(name)

    # When lumping, prefer to use ids that have been used in OTU matching
    # This list could be used for all sorts of purposes...
    ott.loadPreferredIds('ids-that-are-otus.tsv', False)
    ott.loadPreferredIds('ids-in-synthesis.tsv', True)

    ott.setSkeleton(Taxonomy.getTaxonomy('tax/skel/', 'skel'))

    silva = prepare_silva(ott)
    ott.absorb(silva)
    check_invariants(ott)

    h2007 = prepare_h2007(ott)
    ott.absorb(h2007)

    (fungi, fungorum_sans_fungi) = prepare_fungorum(ott)
    ott.absorb(fungi)
    check_invariants(ott)

    # the non-Fungi from Index Fungorum get absorbed below

    lamiales = prepare_lamiales(ott)
    ott.absorb(lamiales)

    (malacostraca, worms_sans_malacostraca) = prepare_worms(ott)
    ott.absorb(malacostraca)

    ncbi = prepare_ncbi(ott)
    align_ncbi_to_silva(ncbi, silva, ott)
    ott.absorb(ncbi)
    check_invariants(ott)

    ott.absorb(worms_sans_malacostraca)

    ott.absorb(fungorum_sans_fungi)

    gbif = prepare_gbif(ott)
    ott.absorb(gbif)

    irmng = prepare_irmng(ott)
    ott.absorb(irmng)

    taxonomies.link_to_h2007(ott)

    get_default_extinct_info_from_gbif(gbif, ott)

    check_invariants(ott)
    # consider try: ... except: print '**** Exception in patch_ott'
    patch_ott(ott)

    # Experimental...
    unextinct_ncbi(ncbi, ott)

    # Remove all trees but the largest (or make them life incertae sedis)
    ott.deforestate()

    # -----------------------------------------------------------------------------
    # OTT id assignment

    # Force some id assignments... will try to automate this in the future.
    # Most of these come from looking at the otu-deprecated.tsv file after a
    # series of smasher runs.

    for (inf, sup, id) in [
        ('Tipuloidea', 'Diptera', '722875'),
        ('Saccharomycetes', 'Saccharomycotina', '989999'),
        ('Phaeosphaeria', 'Ascomycota', '5486272'),
        ('Synedra acus', 'Eukaryota', '992764'),
        ('Epiphloea', 'Halymeniaceae', '5342325'),
        ('Hessea', 'Archaeplastida', '600099'),
        ('Morganella', 'Arthropoda', '6400'),
        ('Rhynchonelloidea', 'Rhynchonellidae', '5316010'),
        ('Epiphloea', 'Lichinales', '5342482'),
        ('Morganella', 'Fungi', '973932'),
        ('Parmeliaceae', 'Lecanorales', '305904'),
    ]:
        tax = ott.taxon(inf, sup)
        if tax != None:
            tax.setId(id)

    ott.taxonThatContains('Rhynchonelloidea',
                          'Sphenarina').setId('795939')  # NCBI

    for (ncbi_id, ott_id, name) in ncbi_assignments_list:
        n = ncbi.maybeTaxon(ncbi_id)
        if n != None:
            im = ott.image(n)
            if im != None:
                im.setId(ott_id)
            else:
                print '** NCBI %s not mapped - %s' % (ncbi_id, name)
        else:
            print '** No NCBI taxon %s - %s' % (ncbi_id, name)

    # Cylindrocarpon is now Neonectria
    ott.image(gbif.taxon('2563163')).setId('51754')

    # Foo
    trich = fungi.maybeTaxon('Trichosporon')
    if trich != None:
        ott.image(trich).setId('364222')

    #ott.image(fungi.taxon('11060')).setId('4107132') #Cryptococcus - a total mess

    # Assign OTT ids to taxa that don't have them, re-using old ids when possible
    ids = Taxonomy.getTaxonomy('tax/prev_ott/')

    # Assign old ids to nodes in the new version
    ott.assignIds(ids)

    report_on_h2007(h2007, ott)

    return ott