def doit():
    rug = Taxonomy.getTaxonomy('scratch/Ruggiero/', 'rug')
    ott = Taxonomy.getTaxonomy('tax/ott/', 'ott')
    union = UnionTaxonomy.newTaxonomy('ott')
    union.absorb(rug)
    union.absorb(ott)
    union.dump('scratch/compare_Ruggiero/', '\t')
def combine(t, s, bluster):
    u = UnionTaxonomy.newTaxonomy('union')
    u.blustery = 0

    ta = u.alignment(t)
    u.align(ta)
    u.merge(ta)

    u.blustery = bluster
    sa = u.alignment(s)

    for root in s.roots():
        sa.alignTaxon(root)

    u.align(sa)
    u.merge(sa)

    u.check()
    return u
Beispiel #3
0
def combine(t, s, bluster):
    u = UnionTaxonomy.newTaxonomy('union')
    u.blustery = 0

    ta = u.alignment(t)
    u.align(ta)
    u.merge(ta)

    u.blustery = bluster
    sa = u.alignment(s)

    for root in s.roots():
        sa.alignTaxon(root)

    u.align(sa)
    u.merge(sa)

    u.check()
    return u
Beispiel #4
0
def combine(sep, t, s, bluster):
    u = UnionTaxonomy.newTaxonomy('union')
    u.blustery = 0
    u.setSkeleton(sep)

    ta = u.alignment(t)
    u.align(ta)
    u.merge(ta)
    print u.lookup('a')

    u.blustery = bluster
    print s.lookup('a')
    sa = u.alignment(s)

    for root in s.roots():
        sa.alignTaxon(root)

    u.align(sa)
    debug_alignment(sa)
    return u
# Requires python.security.respectJavaAccessibility = false
# on java command line or in .jython

from org.opentreeoflife.taxa import Taxonomy
from org.opentreeoflife.smasher import UnionTaxonomy, HomonymReport 

union = UnionTaxonomy()
skel = Taxonomy.getTaxonomy('tax/skel/', 'skel')
union.setSkeleton(skel)

def report(tax, tag):
    union.markDivisionsFromSkeleton(tax, skel)
    HomonymReport.homonymReport(tax, 'reports/' + tag + '-homonym-report.tsv')

if True:
    ott = Taxonomy.getTaxonomy('tax/ott/', 'ott')
    report(ott, 'ott')
else:
    import taxonomies
    report(taxonomies.loadSilva(), 'silva')
    report(taxonomies.loadH2007(), 'h2007')
    report(taxonomies.loadFung(), 'worms')
    report(taxonomies.loadFung(), 'if')
    report(taxonomies.loadNcbi(), 'ncbi')
    report(taxonomies.loadGbif(), 'gbif')
    report(taxonomies.loadIrmng(), 'irmng')
Beispiel #6
0
def create_ott(ott_spec):

    # Fail fast
    additions_clone_path = os.path.join(access_head('amendments'),
                                        'amendments-1')
    if not os.path.isdir(additions_clone_path):
        print '# cannot find', additions_clone_path
        sys.exit(1)

    with open(os.path.join(access_head('idlist'), 'by_qid.csv'),
              'r') as infile:
        print '# can access idlist'

    ott_path = management.source_path(ott_spec)

    ott = UnionTaxonomy.newTaxonomy('ott')

    # Would be nice if there were tests for all of these...
    for name in names_of_interest:
        ott.eventLogger.namesOfInterest.add(name)

    ott.setSkeleton(Taxonomy.getTaxonomy('curation/separation/', 'separation'))

    # These are particularly hard cases; create alignment targets up front
    adjustments.deal_with_polysemies(ott)

    # Align and merge each source in sequence
    merge_sources(ott)

    # "Old" patch system
    TsvEdits.edit(ott, 'curation/edits/')

    # consider try: ... except: print '**** Exception in patch_ott'
    amendments.patch_ott(ott)

    # End of topology changes.  Now assign ids.
    retain_ids(ott, access_source('ott-PREVIOUS'),
               os.path.join(access_head('idlist'), 'by_qid.csv'))

    # Apply the additions (which already have ids assigned).
    # This has to happen *after* ids are assigned, since additions use OTT
    # ids to identify parents.
    print '-- Processing additions --'
    Addition.processAdditions(additions_clone_path, ott)

    # Mint ids for new nodes
    print '-- Minting new ids --'
    ott.assignNewIds(new_taxa_path)

    # Remove all trees but the largest (or make them life incertae sedis)
    ott.deforestate()

    # data structure integrity checks
    ott.check()

    # For deprecated id report (dump)
    ott.loadPreferredIds('ids_that_are_otus.tsv', False)
    ott.loadPreferredIds('ids_in_synthesis.tsv', True)

    ott.dump(ott_path)

    record_ott_sources(ott_spec)

    return ott
import sys, os, csv

from org.opentreeoflife.taxa import Taxonomy, SourceTaxonomy, Taxon
from org.opentreeoflife.smasher import UnionTaxonomy

dwh = UnionTaxonomy.newTaxonomy('dwh')

#Use this to tell smasher what separation file to use
dwh.setSkeleton(Taxonomy.getTaxonomy('tax/separation/', 'separation'))

# 1. trunk
# 2. ictv
# 3. IOC
# 4. ASW
# 5. ODO
# 6. BOM
# 7. ERE
# 8. ONY
# 9. EET
# 10. NCBI
# 11. WOR
# 12. CLP
# 13. COL

#use this to load the taxonomies

trunk = Taxonomy.getTaxonomy('t/tax/2018_12/trunk/', 'trunk')
ictv = Taxonomy.getTaxonomy('t/tax/2018_12/ictv/', 'ictv')
IOC = Taxonomy.getTaxonomy('t/tax/2018_12/IOC/', 'IOC')
ASW = Taxonomy.getTaxonomy('t/tax/2018_12/ASW/', 'ASW')
ODO = Taxonomy.getTaxonomy('t/tax/2018_12/ODO/', 'ODO')
# Command line argument = file to write to
# Writes a row for every OTT id that
#  (a) occurs in tax/ott/,
#  (b) occurs as an OTU in phylesystem,
#  (c) is sourced only from in IRMNG.

import csv, sys

from org.opentreeoflife.taxa import Taxonomy, Rank
from org.opentreeoflife.smasher import UnionTaxonomy

union = UnionTaxonomy.newTaxonomy('ott')
union.loadPreferredIds('ids_that_are_otus.tsv', False)
union.loadPreferredIds('ids_in_synthesis.tsv', True)

ott = Taxonomy.getTaxonomy('tax/ott/', 'ott')
#ott = Taxonomy.getTaxonomy('t/tax/aster/', 'ott')

with open(sys.argv[1], 'w') as outfile:
    writer = csv.writer(outfile)
    writer.writerow(['irmng','ott','name','synthesis'])
    for taxon in ott.taxa():
        # if (taxon.rank == Rank.SPECIES_RANK and ...)
        if (len(taxon.sourceIds) == 1 and
            taxon.sourceIds[0].prefix == 'irmng'):
            probe = union.importantIds.lookupId(taxon.id)
            if probe != None:
                writer.writerow([taxon.sourceIds[0].id,
                                 taxon.id,
                                 taxon.name,
                                 'synthesis' if probe.inSynthesis else ''])
# Jython script to build the "model village" taxonomy.

from org.opentreeoflife.taxa import Taxonomy
from org.opentreeoflife.smasher import UnionTaxonomy
from claim import Has_child

# Create model taxonomy
tax = UnionTaxonomy()

# Establish homonym-resolution skeleton (not really used here)
# skel = Taxonomy.getTaxonomy('tax/skel/', 'skel')
# tax.setSkeleton(skel)


# Add NCBI subset to the model taxonomy
ncbi = Taxonomy.getTaxonomy('t/tax/ncbi_aster/')
# analyzeOTUs sets flags on questionable taxa ("unclassified" and so on)
#  to allow the option of suppression downstream
ncbi.analyzeOTUs()
tax.absorb(ncbi)

# Add GBIF subset fo the model taxonomy
gbif = Taxonomy.getTaxonomy('t/tax/gbif_aster/')
# analyzeMajorRankConflicts sets the "major_rank_conflict" flag when
# intermediate ranks are missing (e.g. a family that's a child of a
# class)
gbif.analyzeMajorRankConflicts()
tax.absorb(gbif)

# "Old" patch system with tab-delimited files
tax.edit('t/edits/')
Beispiel #10
0
# Command line argument = file to write to
# Writes a row for every OTT id that
#  (a) occurs in tax/ott/,
#  (b) occurs as an OTU in phylesystem,
#  (c) is sourced only from in IRMNG.

import csv, sys

from org.opentreeoflife.taxa import Taxonomy, Rank
from org.opentreeoflife.smasher import UnionTaxonomy

union = UnionTaxonomy.newTaxonomy('ott')
union.loadPreferredIds('ids_that_are_otus.tsv', False)
union.loadPreferredIds('ids_in_synthesis.tsv', True)

ott = Taxonomy.getTaxonomy('tax/ott/', 'ott')
#ott = Taxonomy.getTaxonomy('t/tax/aster/', 'ott')

with open(sys.argv[1], 'w') as outfile:
    writer = csv.writer(outfile)
    writer.writerow(['irmng', 'ott', 'name', 'synthesis'])
    for taxon in ott.taxa():
        # if (taxon.rank == Rank.SPECIES_RANK and ...)
        if (len(taxon.sourceIds) == 1
                and taxon.sourceIds[0].prefix == 'irmng'):
            probe = union.importantIds.lookupId(taxon.id)
            if probe != None:
                writer.writerow([
                    taxon.sourceIds[0].id, taxon.id, taxon.name,
                    'synthesis' if probe.inSynthesis else ''
                ])
def create_ott():

    ott = UnionTaxonomy.newTaxonomy()

    # There ought to be tests for all of these...

    for name in names_of_interest:
        ott.eventlogger.namesOfInterest.add(name)

    # When lumping, prefer to use ids that have been used in OTU matching
    # This list could be used for all sorts of purposes...
    ott.loadPreferredIds('ids-that-are-otus.tsv', False)
    ott.loadPreferredIds('ids-in-synthesis.tsv', True)

    ott.setSkeleton(Taxonomy.getTaxonomy('tax/skel/', 'skel'))

    silva = prepare_silva(ott)
    ott.absorb(silva)
    check_invariants(ott)

    h2007 = prepare_h2007(ott)
    ott.absorb(h2007)

    (fungi, fungorum_sans_fungi) = prepare_fungorum(ott)
    ott.absorb(fungi)
    check_invariants(ott)

    # the non-Fungi from Index Fungorum get absorbed below

    lamiales = prepare_lamiales(ott)
    ott.absorb(lamiales)

    (malacostraca, worms_sans_malacostraca) = prepare_worms(ott)
    ott.absorb(malacostraca)

    ncbi = prepare_ncbi(ott)
    align_ncbi_to_silva(ncbi, silva, ott)
    ott.absorb(ncbi)
    check_invariants(ott)

    ott.absorb(worms_sans_malacostraca)

    ott.absorb(fungorum_sans_fungi)

    gbif = prepare_gbif(ott)
    ott.absorb(gbif)

    irmng = prepare_irmng(ott)
    ott.absorb(irmng)

    taxonomies.link_to_h2007(ott)

    get_default_extinct_info_from_gbif(gbif, ott)

    check_invariants(ott)
    # consider try: ... except: print '**** Exception in patch_ott'
    patch_ott(ott)

    # Experimental...
    unextinct_ncbi(ncbi, ott)

    # Remove all trees but the largest (or make them life incertae sedis)
    ott.deforestate()

    # -----------------------------------------------------------------------------
    # OTT id assignment

    # Force some id assignments... will try to automate this in the future.
    # Most of these come from looking at the otu-deprecated.tsv file after a 
    # series of smasher runs.

    for (inf, sup, id) in [
            ('Tipuloidea', 'Diptera', '722875'),
            ('Saccharomycetes', 'Saccharomycotina', '989999'),
            ('Phaeosphaeria', 'Ascomycota', '5486272'),
            ('Synedra acus','Eukaryota','992764'),
            ('Epiphloea','Halymeniaceae','5342325'),
            ('Hessea','Archaeplastida','600099'),
            ('Morganella','Arthropoda','6400'),
            ('Rhynchonelloidea','Rhynchonellidae','5316010'),
            ('Epiphloea', 'Lichinales', '5342482'),
            ('Morganella', 'Fungi', '973932'),
            ('Parmeliaceae', 'Lecanorales', '305904'),
    ]:
        tax = ott.taxon(inf, sup)
        if tax != None:
            tax.setId(id)

    ott.taxonThatContains('Rhynchonelloidea', 'Sphenarina').setId('795939') # NCBI

    for (ncbi_id, ott_id, name) in ncbi_assignments_list:
        n = ncbi.maybeTaxon(ncbi_id)
        if n != None:
            im = ott.image(n)
            if im != None:
                im.setId(ott_id)
            else:
                print '** NCBI %s not mapped - %s' % (ncbi_id, name)
        else:
            print '** No NCBI taxon %s - %s' % (ncbi_id, name)

    # Cylindrocarpon is now Neonectria
    ott.image(gbif.taxon('2563163')).setId('51754')

    # Foo
    trich = fungi.maybeTaxon('Trichosporon')
    if trich != None:
        ott.image(trich).setId('364222')

    #ott.image(fungi.taxon('11060')).setId('4107132') #Cryptococcus - a total mess


    # Assign OTT ids to taxa that don't have them, re-using old ids when possible
    ids = Taxonomy.getTaxonomy('tax/prev_ott/')

    # Assign old ids to nodes in the new version
    ott.assignIds(ids)

    report_on_h2007(h2007, ott)

    return ott
# Requires python.security.respectJavaAccessibility = false
# on java command line or in .jython

from org.opentreeoflife.taxa import Taxonomy
from org.opentreeoflife.smasher import UnionTaxonomy, HomonymReport

union = UnionTaxonomy()
skel = Taxonomy.getTaxonomy('tax/skel/', 'skel')
union.setSkeleton(skel)


def report(tax, tag):
    union.markDivisionsFromSkeleton(tax, skel)
    HomonymReport.homonymReport(tax, 'reports/' + tag + '-homonym-report.tsv')


if True:
    ott = Taxonomy.getTaxonomy('tax/ott/', 'ott')
    report(ott, 'ott')
else:
    import taxonomies
    report(taxonomies.loadSilva(), 'silva')
    report(taxonomies.loadH2007(), 'h2007')
    report(taxonomies.loadFung(), 'worms')
    report(taxonomies.loadFung(), 'if')
    report(taxonomies.loadNcbi(), 'ncbi')
    report(taxonomies.loadGbif(), 'gbif')
    report(taxonomies.loadIrmng(), 'irmng')
def assemble():

    # Create model taxonomy
    tax = UnionTaxonomy.newTaxonomy('ott')

    for name in ['Pentaphragma ellipticum',
                 'Lachnophyllum',
                 'Sipolisia',
                 'Cicerbita bourgaei',
                 'Adenophora triphylla',
                 'Artemisia vulgaris',
                 'Carlina libanotica',
    ]:
        tax.watch(name)

    # Establish homonym-resolution skeleton (not really used here)
    # skel = Taxonomy.getTaxonomy('tax/skel/', 'skel')
    # tax.setSkeleton(skel)


    # Add NCBI subset to the model taxonomy
    ncbi = Taxonomy.getTaxonomy('t/tax/ncbi_aster/', 'ncbi')
    # analyzeOTUs sets flags on questionable taxa ("unclassified" and so on)
    #  to allow the option of suppression downstream
    ncbi.analyzeOTUs()
    align_and_merge(tax.alignment(ncbi))

    # Add GBIF subset fo the model taxonomy
    gbif = Taxonomy.getTaxonomy('t/tax/gbif_aster/', 'gbif')
    gbif.smush()
    # analyzeMajorRankConflicts sets the "major_rank_conflict" flag when
    # intermediate ranks are missing (e.g. a family that's a child of a
    # class)
    gbif.analyzeMajorRankConflicts()
    align_and_merge(tax.alignment(gbif))

    # "Old" patch system with tab-delimited files
    TsvEdits.edit(tax, 't/edits/')

    props = [
        has_parent(taxon('Phellinaceae'), taxon('Asterales'), 'test:1')
    ]

    for prop in props:
        print proclaim(tax, prop)

    gen = tax.newTaxon("Opentreeia", "genus", "data:testing")
    gen.take(tax.newTaxon("Opentreeia sp. C", "species", "data:testing"))
    gen.take(tax.newTaxon("Opentreeia sp. D", "species", "data:testing"))

    # Example of referring to a taxon
    fam = tax.maybeTaxon("Phellinaceae")

    if fam != None:
        # Example of how you might add a genus to the taxonomy
        fam.take(gen)

    # Test deletion feature
    sp = tax.newTaxon("Opentreeia sp. C", "species", "data:testing")
    gen.take(sp)
    sp.prune("aster.py")

    # tax.loadPreferredIds('ids-that-are-otus.tsv')

    additions_repo_path = 't/feed/amendments/amendments-0'
    new_taxa_path = 't/new_taxa'

    # Assign identifiers to the taxa in the model taxonomy.  Identifiers
    # assigned in the previous version are carried over to this version.
    ids = Taxonomy.getTaxonomy('t/tax/prev_aster/', 'ott')
    tax.carryOverIds(ids)    # performs alignment

    Addition.processAdditions(additions_repo_path, tax)

    if False:  # too slow for everyday testing purposes.
        print '-- Checking id list'
        assign_ids_from_list(tax, 'ott_id_list/by_qid.csv')

    tax.assignNewIds(new_taxa_path)

    tax.check()

    # Write the model taxonomy out to a set of files
    tax.dump('t/tax/aster/', '\t|\t')
Beispiel #14
0
def assemble():

    # Create model taxonomy
    tax = UnionTaxonomy.newTaxonomy('ott')

    for name in [
            'Pentaphragma ellipticum',
            'Lachnophyllum',
            'Sipolisia',
            'Cicerbita bourgaei',
            'Adenophora triphylla',
            'Artemisia vulgaris',
            'Carlina libanotica',
    ]:
        tax.watch(name)

    # Establish homonym-resolution skeleton (not really used here)
    # skel = Taxonomy.getTaxonomy('tax/skel/', 'skel')
    # tax.setSkeleton(skel)

    # Add NCBI subset to the model taxonomy
    ncbi = Taxonomy.getTaxonomy('t/tax/ncbi_aster/', 'ncbi')
    # analyzeOTUs sets flags on questionable taxa ("unclassified" and so on)
    #  to allow the option of suppression downstream
    ncbi.analyzeOTUs()
    align_and_merge(tax.alignment(ncbi))

    # Add GBIF subset fo the model taxonomy
    gbif = Taxonomy.getTaxonomy('t/tax/gbif_aster/', 'gbif')
    gbif.smush()
    # analyzeMajorRankConflicts sets the "major_rank_conflict" flag when
    # intermediate ranks are missing (e.g. a family that's a child of a
    # class)
    gbif.analyzeMajorRankConflicts()
    align_and_merge(tax.alignment(gbif))

    # "Old" patch system with tab-delimited files
    TsvEdits.edit(tax, 't/edits/')

    props = [has_parent(taxon('Phellinaceae'), taxon('Asterales'), 'test:1')]

    for prop in props:
        print proclaim(tax, prop)

    gen = tax.newTaxon("Opentreeia", "genus", "data:testing")
    gen.take(tax.newTaxon("Opentreeia sp. C", "species", "data:testing"))
    gen.take(tax.newTaxon("Opentreeia sp. D", "species", "data:testing"))

    # Example of referring to a taxon
    fam = tax.maybeTaxon("Phellinaceae")

    if fam != None:
        # Example of how you might add a genus to the taxonomy
        fam.take(gen)

    # Test deletion feature
    sp = tax.newTaxon("Opentreeia sp. C", "species", "data:testing")
    gen.take(sp)
    sp.prune("aster.py")

    # tax.loadPreferredIds('ids-that-are-otus.tsv')

    additions_repo_path = 't/feed/amendments/amendments-0'
    new_taxa_path = 't/new_taxa'

    # Assign identifiers to the taxa in the model taxonomy.  Identifiers
    # assigned in the previous version are carried over to this version.
    ids = Taxonomy.getTaxonomy('t/tax/prev_aster/', 'ott')
    tax.carryOverIds(ids)  # performs alignment

    Addition.processAdditions(additions_repo_path, tax)

    if False:  # too slow for everyday testing purposes.
        print '-- Checking id list'
        assign_ids_from_list(tax, 'ott_id_list/by_qid.csv')

    tax.assignNewIds(new_taxa_path)

    tax.check()

    # Write the model taxonomy out to a set of files
    tax.dump('t/tax/aster/', '\t|\t')