def doit():
    rug = Taxonomy.getTaxonomy('scratch/Ruggiero/', 'rug')
    ott = Taxonomy.getTaxonomy('tax/ott/', 'ott')
    union = UnionTaxonomy.newTaxonomy('ott')
    union.absorb(rug)
    union.absorb(ott)
    union.dump('scratch/compare_Ruggiero/', '\t')
def conflict(spec1, space1, spec2, space2):

    # Reference tree
    ref = Taxonomy.getTaxonomy(spec1, space1)

    # Input tree
    input = Taxonomy.getTaxonomy(spec2, space2)

    a = AlignmentByName(input, ref)
    a.align();

    if False:
        for node in input.taxa():
            print node, a.getTaxon(node)

    print 'Conflict analysis'
    ca = ConflictAnalysis(input, ref, a, False)
    print '  input root:', ca.inputRoot
    print '  ref root:', ca.refRoot
    print '  induced root:', ca.inducedRoot
    print '  ingroup:', ca.ingroup
    print '  induced ingroup:', ca.inducedIngroup
    print '  map size:', ca.map.size()
    print '  comap size:', ca.comap.size()

    mapped_tip_count = 0
    unmapped_tip_count = 0
    none_count = 0

    rel_counts = {}

    if ca.inducedRoot != None:
        for node in ca.ingroup.descendants(True):
            if node.hasChildren():
                art = ca.articulation(node)
                if art != None:
                    n = art.disposition.name
                    print node, n, art.witness
                    rel_counts[n] = rel_counts.get(n, 0) + 1
                else:
                    print node, 'no articulation'
                    none_count += 1
            elif a.getTaxon(node) != None:
                mapped_tip_count += 1
            else:
                unmapped_tip_count += 1
                print node, 'unmapped'
    else:
        print 'no induced root!'

    print
    for n in rel_counts:
        print '%s: %s' % (n, rel_counts[n])
    print 'Mapped tips:', mapped_tip_count
    print 'Unmapped tips:', unmapped_tip_count
    print 'Other:', none_count
def load_tree(path):
    tree = Taxonomy.getTaxonomy(path, 'ott')
    count = 0
    for id in tree.allIds():
        count += 1
    print count, 'ids'
    return tree
def load_fung():
    fung = Taxonomy.getTaxonomy('tax/fung/', 'if')

    fung.analyzeMajorRankConflicts()

    # 2014-04-14 Bad Fungi homonyms in new version of IF.  90156 is the good one.
    # 90154 has no descendants
    if fung.maybeTaxon('90154') != None:
        print 'Removing Fungi 90154'
        fung.taxon('90154').prune(this_source)
    # 90155 is "Nom. inval." and has no descendants
    if fung.maybeTaxon('90155') != None:
        print 'Removing Fungi 90155'
        fung.taxon('90155').prune(this_source)

    fix_basal(fung)

    # smush folds sibling taxa that have the same name.
    # fung.smush()

    if True:
        patch_fung(fung)
    else:
        try:
            patch_fung(fung)
        except:
            print '**** Exception in patch_fung'

    fung.smush()

    return fung
Esempio n. 5
0
def compare(t1, t2):
    print 'comparing', t1, 'to', t2
    retired = 0
    became_hidden = 0
    became_unhidden = 0
    became_extinct = 0
    became_unextinct = 0
    became_suppressed = 0
    became_unsuppressed = 0
    kept = 0
    novel = 0
    tax1 = Taxonomy.getTaxonomy(t1, 'x')
    tax1.inferFlags()
    tax2 = Taxonomy.getTaxonomy(t2, 'x')
    tax2.inferFlags()
    for taxon in tax1.taxa():
        probe = tax2.lookupId(taxon.id)
        if probe == None:
            retired += 1
        elif probe.isAnnotatedHidden() and not taxon.isAnnotatedHidden():
            became_hidden += 1
        elif not probe.isAnnotatedHidden() and taxon.isAnnotatedHidden():
            became_unhidden += 1
        elif probe.isExtinct() and not taxon.isExtinct():
            became_extinct += 1
        elif not probe.isExtinct() and taxon.isExtinct():
            became_unextinct += 1
        elif probe.isHidden() and not taxon.isHidden():
            became_suppressed += 1
        elif not probe.isHidden() and taxon.isHidden():
            became_unsuppressed += 1
        else:
            kept += 1
    for taxon in tax2.taxa():
        if tax1.lookupId(taxon.id) == None:
            novel += 1
    print
    print 'id retired:', retired
    print 'newly hidden:', became_hidden
    print 'no longer hidden:', became_unhidden
    print 'newly extinct:', became_extinct
    print 'no longer extinct:', became_unextinct
    print 'newly otherwise suppressed:', became_suppressed
    print 'no longer otherwise suppressed:', became_unsuppressed
    print 'new:', novel
    print 'no change in status:', kept
def compare(t1, t2):
    print 'comparing', t1, 'to', t2
    retired = 0
    became_hidden = 0
    became_unhidden = 0
    became_extinct = 0
    became_unextinct = 0
    became_suppressed = 0
    became_unsuppressed = 0
    kept = 0
    novel = 0
    tax1 = Taxonomy.getTaxonomy(t1, 'x')
    tax1.inferFlags()
    tax2 = Taxonomy.getTaxonomy(t2, 'x')
    tax2.inferFlags()
    for taxon in tax1.taxa():
        probe = tax2.lookupId(taxon.id)
        if probe == None:
            retired += 1
        elif probe.isAnnotatedHidden() and not taxon.isAnnotatedHidden():
            became_hidden += 1
        elif not probe.isAnnotatedHidden() and taxon.isAnnotatedHidden():
            became_unhidden += 1
        elif probe.isExtinct() and not taxon.isExtinct():
            became_extinct += 1
        elif not probe.isExtinct() and taxon.isExtinct():
            became_unextinct += 1
        elif probe.isHidden() and not taxon.isHidden():
            became_suppressed += 1
        elif not probe.isHidden() and taxon.isHidden():
            became_unsuppressed += 1
        else:
            kept += 1
    for taxon in tax2.taxa():
        if tax1.lookupId(taxon.id) == None:
            novel += 1
    print
    print 'id retired:', retired
    print 'newly hidden:', became_hidden
    print 'no longer hidden:', became_unhidden
    print 'newly extinct:', became_extinct
    print 'no longer extinct:', became_unextinct
    print 'newly otherwise suppressed:', became_suppressed
    print 'no longer otherwise suppressed:', became_unsuppressed
    print 'new:', novel
    print 'no change in status:', kept
def load_silva():
    silva = Taxonomy.getTaxonomy('tax/silva/', 'silva')

    # Used in studies pg_2448,pg_2783,pg_2753, seen deprecated on 2015-07-20
    silva.taxon('AF364847').rename('Pantoea ananatis LMG 20103')    # ncbi:706191
    silva.taxon('EF690403').rename('Pantoea ananatis B1-9')  # ncbi:1048262

    patch_silva(silva)

    return silva
def load_ncbi():
    ncbi = Taxonomy.getTaxonomy('tax/ncbi/', 'ncbi')
    fix_SAR(ncbi)

    ncbi.taxon('Viridiplantae').rename('Chloroplastida')
    patch_ncbi(ncbi)

    # analyzeOTUs sets flags on questionable taxa ("unclassified",
    #  hybrids, and so on) to allow the option of suppression downstream
    ncbi.analyzeOTUs()
    ncbi.analyzeContainers()

    return ncbi
def load_gbif():
    gbif = Taxonomy.getTaxonomy('tax/gbif/', 'gbif')
    gbif.smush()

    # In GBIF, if a rank is skipped for some children but not others, that
    # means the rank-skipped children are incertae sedis.  Mark them so.
    gbif.analyzeMajorRankConflicts()

    fix_basal(gbif)  # creates a Eukaryota node
    gbif.taxon('Animalia').synonym('Metazoa')

    patch_gbif(gbif)
    return gbif
Esempio n. 10
0
def load_worms():
    worms = Taxonomy.getTaxonomy('tax/worms/', 'worms')
    worms.smush()

    worms.taxon('Biota').rename('life')
    worms.taxon('Animalia').synonym('Metazoa')

    fix_basal(worms)

    # 2015-02-17 According to WoRMS web site.  Occurs in pg_1229
    if worms.maybeTaxon('Scenedesmus communis') != None:
        worms.taxon('Scenedesmus communis').synonym('Scenedesmus caudata')

    # See NCBI
    worms.taxon('Millericrinida').extant()

    # Help to match up with IRMNG
    worms.taxon('Ochrophyta').synonym('Heterokontophyta')

    worms.smush()  # Gracilimesus gorbunovi, pg_1783

    return worms
# counts number of taxa with rank=family in a given taxon

from org.opentreeoflife.taxa import Taxonomy, Rank
import argparse

parser = argparse.ArgumentParser(description='load nexsons into postgres')
parser.add_argument('taxonname',
    help='name of taxon to count'
    )
args = parser.parse_args()

name = args.taxonname
ott_path = '/Users/karen/Documents/opentreeoflife/data/ott/ott2.9draft12/'
ott = Taxonomy.getTaxonomy(ott_path, 'ott')
def count_families(taxon):
    count = 0
    with open('families.txt','w') as f:
        for t in taxon.descendants(False):
            if t.rank == Rank.FAMILY_RANK:
                f.write("{n}\n".format(n=t.name))
                count += 1
    f.close()
    return count
print "number families: ",count_families(ott.taxon(name))
Esempio n. 12
0
def merge_sources(ott):

    # Genbank - this is a kludge to make sure it's in the dependencies list.
    # But eventually it ought to be handled in this file, not in the silva
    # import script.
    access_head('genbank')

    # SILVA
    silva = load_taxonomy('silva')
    adjustments.adjust_silva(silva)
    silva_to_ott = adjustments.align_silva(silva, ott)
    align_and_merge(silva_to_ott)

    # Hibbett 2007
    h2007 = Taxonomy.getTaxonomy('curation/h2007/tree.tre', 'h2007')
    adjustments.adjust_h2007(h2007)
    h2007_to_ott = ott.alignment(h2007)
    align_and_merge(h2007_to_ott)

    # Index Fungorum
    fungorum = load_taxonomy('fung')
    adjustments.adjust_fung(fungorum)
    (fungi, fungorum_sans_fungi) = split_taxonomy(fungorum, 'Fungi')
    align_and_merge(adjustments.align_fungi(fungi, ott))

    # Connect IF families to Hibbett 2007 orders
    adjustments.link_to_h2007(ott)

    # Look for orders that have no children in OTT
    report_on_h2007(h2007, h2007_to_ott, '#')

    # the non-Fungi from Index Fungorum get absorbed below

    lamiales = Taxonomy.getTaxonomy('curation/lamiales/', 'study713')
    adjustments.adjust_lamiales(lamiales)
    align_and_merge(adjustments.align_lamiales(lamiales, ott))

    # WoRMS
    # higher priority to Worms for Malacostraca, Cnidaria, Mollusca
    #  so we split out
    # those clades from worms and absorb them before NCBI
    worms = load_taxonomy('worms')
    adjustments.adjust_worms(worms)
    # Malacostraca instead of Decapoda because M. is in the separation taxonomy
    (malacostraca,
     worms_sans_malacostraca) = split_taxonomy(worms, 'Malacostraca')
    align_and_merge(ott.alignment(malacostraca))
    (cnidaria, worms_sans_cnidaria) = split_taxonomy(worms_sans_malacostraca,
                                                     'Cnidaria')
    align_and_merge(ott.alignment(cnidaria))
    (mollusca, low_priority_worms) = split_taxonomy(worms_sans_cnidaria,
                                                    'Mollusca')
    align_and_merge(ott.alignment(mollusca))

    # NCBI
    ncbi = load_taxonomy('ncbi')
    adjustments.adjust_ncbi(ncbi)

    # analyzeOTUs sets flags on questionable taxa (hybrid, metagenomes,
    #  etc) to allow the option of suppression downstream
    ncbi.analyzeOTUs()

    ncbi_to_ott = adjustments.align_ncbi(ncbi, silva, ott)
    align_and_merge(ncbi_to_ott)

    # Look for orders that have no children in OTT
    report_on_h2007(h2007, h2007_to_ott, '#')

    # Reporting
    # Get mapping from NCBI to OTT, derived via SILVA and Genbank.
    mappings = load_ncbi_to_silva(
        os.path.join(management.resource_path('silva'), 'ncbi_to_silva.tsv'),
        ncbi, silva, silva_to_ott)
    compare_ncbi_to_silva(mappings, silva_to_ott)

    # Low-priority WoRMS
    # This is suboptimal, but the names are confusing the division logic
    a = adjustments.align_worms(low_priority_worms, ott)
    align_and_merge(a)

    # The rest of Index Fungorum.  (Maybe not a good idea to use this.
    # These taxa are all in GBIF.)
    # align_and_merge(adjustments.align_fungorum_sans_fungi(fungorum_sans_fungi, ott))

    # GBIF
    gbif = load_taxonomy('gbif')
    adjustments.adjust_gbif(gbif)
    gbif_to_ott = adjustments.align_gbif(gbif, ott)
    align_and_merge(gbif_to_ott)

    # http://dx.doi.org/10.1016/j.ympev.2004.12.019 "Eccrinales
    # (Trichomycetes) are not fungi, but a clade of protists at the
    # early divergence of animals and fungi"
    debug_divisions('Enterobryus cingaloboli', gbif, ott)

    # Cylindrocarpon is now Neonectria
    cyl = gbif_to_ott.image(gbif.taxon('Cylindrocarpon', 'Ascomycota'))
    if cyl != None:
        cyl.setId('51754')

    # IRMNG
    irmng = load_taxonomy('irmng')
    adjustments.adjust_irmng(irmng)
    a = adjustments.align_irmng(irmng, ott)
    hide_irmng(irmng)
    align_and_merge(a)

    # Misc fixups
    report_on_h2007(h2007, h2007_to_ott, '**')

    get_default_extinct_info_from_gbif(
        os.path.join(management.resource_path('gbif'), 'paleo.tsv'), gbif,
        gbif_to_ott)
Esempio n. 13
0
def load_taxonomy(spec):
    return Taxonomy.getTaxonomy(access_head(spec),
                                management.get_property(spec, "ott_idspace"))
Esempio n. 14
0
def load_713():
    study713 = Taxonomy.getTaxonomy('tax/713/', 'study713')
    return study713
# Command line argument = file to write to
# Writes a row for every OTT id that
#  (a) occurs in tax/ott/,
#  (b) occurs as an OTU in phylesystem,
#  (c) is sourced only from in IRMNG.

import csv, sys

from org.opentreeoflife.taxa import Taxonomy, Rank
from org.opentreeoflife.smasher import UnionTaxonomy

union = UnionTaxonomy.newTaxonomy('ott')
union.loadPreferredIds('ids_that_are_otus.tsv', False)
union.loadPreferredIds('ids_in_synthesis.tsv', True)

ott = Taxonomy.getTaxonomy('tax/ott/', 'ott')
#ott = Taxonomy.getTaxonomy('t/tax/aster/', 'ott')

with open(sys.argv[1], 'w') as outfile:
    writer = csv.writer(outfile)
    writer.writerow(['irmng','ott','name','synthesis'])
    for taxon in ott.taxa():
        # if (taxon.rank == Rank.SPECIES_RANK and ...)
        if (len(taxon.sourceIds) == 1 and
            taxon.sourceIds[0].prefix == 'irmng'):
            probe = union.importantIds.lookupId(taxon.id)
            if probe != None:
                writer.writerow([taxon.sourceIds[0].id,
                                 taxon.id,
                                 taxon.name,
                                 'synthesis' if probe.inSynthesis else ''])
Esempio n. 16
0
# Jython script to build the "model village" taxonomy.

from org.opentreeoflife.taxa import Taxonomy
from org.opentreeoflife.smasher import UnionTaxonomy
from claim import Has_child

# Create model taxonomy
tax = UnionTaxonomy()

# Establish homonym-resolution skeleton (not really used here)
# skel = Taxonomy.getTaxonomy('tax/skel/', 'skel')
# tax.setSkeleton(skel)


# Add NCBI subset to the model taxonomy
ncbi = Taxonomy.getTaxonomy('t/tax/ncbi_aster/')
# analyzeOTUs sets flags on questionable taxa ("unclassified" and so on)
#  to allow the option of suppression downstream
ncbi.analyzeOTUs()
tax.absorb(ncbi)

# Add GBIF subset fo the model taxonomy
gbif = Taxonomy.getTaxonomy('t/tax/gbif_aster/')
# analyzeMajorRankConflicts sets the "major_rank_conflict" flag when
# intermediate ranks are missing (e.g. a family that's a child of a
# class)
gbif.analyzeMajorRankConflicts()
tax.absorb(gbif)

# "Old" patch system with tab-delimited files
tax.edit('t/edits/')
Esempio n. 17
0
def load_irmng():
    irmng = Taxonomy.getTaxonomy('tax/irmng/', 'irmng')
    irmng.smush()
    irmng.analyzeMajorRankConflicts()

    fix_basal(irmng)
    irmng.taxon('Animalia').synonym('Metazoa')

    # JAR 2014-04-26 Flush all 'Unaccepted' taxa
    irmng.taxon('Unaccepted', 'life').prune(this_source)

    # Fixes

    # Neopithecus (extinct) occurs in two places.  Flush one, mark the other
    irmng.taxon('1413316').prune(this_source) #Neopithecus in Mammalia
    irmng.taxon('1413315').extinct() #Neopithecus in Primates (Pongidae)

    # RR #50
    # irmng.taxon('Saxo-Fridericia').rename('Saxofridericia')
    # irmng.taxon('Saxofridericia').absorb(irmng.taxon('Saxo-fridericia'))
    saxo = irmng.maybeTaxon('1063899')
    if saxo != None:
        saxo.absorb(irmng.taxon('1071613'))

    # Romina 2014-04-09
    # IRMNG has EIGHT different Trichodermas.  (Four are synonyms of other things.)
    # 1307461 = Trichoderma Persoon 1794, in Hypocreaceae
    # https://github.com/OpenTreeOfLife/reference-taxonomy/issues/86
    irmng.taxon('Hypocrea').absorb(irmng.taxon('1307461'))

    # JAR 2015-06-28
    # The synonym Ochrothallus multipetalus -> Niemeyera multipetala
    # is no good; it interferes with correct processing of Ochrothallus 
    # multipetalus.  We could remove the synonym, but instead remove its 
    # target because no synonym-removal command is available.
    irmng.taxon('Niemeyera multipetala').prune(this_source)

    tip = irmng.taxon('Tipuloidea', 'Hemiptera')  # irmng:1170022
    if tip != None:
        tip.prune("about:blank#this-homonym-is-causing-too-much-trouble")

    oph = irmng.taxon('Ophiurina', 'Ophiurinidae') # irmng:1346026
    if oph != None:
        oph.prune("about:blank#this-homonym-is-causing-too-much-trouble")

    # NCBI synonymizes Pelecypoda = Bivalvia
    irmng.taxon('Bivalvia').absorb(irmng.taxon('Pelecypoda')) # bogus order
    # hmm
    irmng.taxon('Bivalvia').extant()

    # This one was mapping to Blattodea, and making it extinct.
    # Caused me a couple of hours of grief.
    # My guess is it's because its unique child Sinogramma is in Blattodea in GBIF.
    # Wikipedia says it's paraphyletic.
    irmng.taxon('Blattoptera', 'Insecta').prune('https://en.wikipedia.org/wiki/Blattoptera')

    # 2015-07-25 Found while trying to figure out why Theraphosidae was marked extinct.
    # NCBI thinks that Theraphosidae and Aviculariidae are the same.
    irmng.taxon('Aviculariidae').extant()

    # 2015-07-25 Extra Dipteras are confusing new division logic.  Barren genus
    irmng.taxon('1323521').prune(this_source)

    # 2015-09-10 This one is unclassified (Diptera) and is leading to confusion with two other Steinias.
    irmng.taxon('1299622').prune(this_source)

    # 2015-09-11 https://github.com/OpenTreeOfLife/feedback/issues/74
    # Lymnea is a snail, not a shark
    irmng.taxon('1317416').prune(this_source)

    # 2015-10-12 JAR checked IRMNG online and this taxon (Ctenophora in Chelicerata) did not exist
    if irmng.maybeTaxon('1279363') != None:
        irmng.taxon('1279363').prune(this_source)

    return irmng
Esempio n. 18
0
# Command line argument = file to write to
# Writes a row for every OTT id that
#  (a) occurs in tax/ott/,
#  (b) occurs as an OTU in phylesystem,
#  (c) is sourced only from in IRMNG.

import csv, sys

from org.opentreeoflife.taxa import Taxonomy, Rank
from org.opentreeoflife.smasher import UnionTaxonomy

union = UnionTaxonomy.newTaxonomy('ott')
union.loadPreferredIds('ids_that_are_otus.tsv', False)
union.loadPreferredIds('ids_in_synthesis.tsv', True)

ott = Taxonomy.getTaxonomy('tax/ott/', 'ott')
#ott = Taxonomy.getTaxonomy('t/tax/aster/', 'ott')

with open(sys.argv[1], 'w') as outfile:
    writer = csv.writer(outfile)
    writer.writerow(['irmng', 'ott', 'name', 'synthesis'])
    for taxon in ott.taxa():
        # if (taxon.rank == Rank.SPECIES_RANK and ...)
        if (len(taxon.sourceIds) == 1
                and taxon.sourceIds[0].prefix == 'irmng'):
            probe = union.importantIds.lookupId(taxon.id)
            if probe != None:
                writer.writerow([
                    taxon.sourceIds[0].id, taxon.id, taxon.name,
                    'synthesis' if probe.inSynthesis else ''
                ])
Esempio n. 19
0
def assemble():

    # Create model taxonomy
    tax = UnionTaxonomy.newTaxonomy('ott')

    for name in [
            'Pentaphragma ellipticum',
            'Lachnophyllum',
            'Sipolisia',
            'Cicerbita bourgaei',
            'Adenophora triphylla',
            'Artemisia vulgaris',
            'Carlina libanotica',
    ]:
        tax.watch(name)

    # Establish homonym-resolution skeleton (not really used here)
    # skel = Taxonomy.getTaxonomy('tax/skel/', 'skel')
    # tax.setSkeleton(skel)

    # Add NCBI subset to the model taxonomy
    ncbi = Taxonomy.getTaxonomy('t/tax/ncbi_aster/', 'ncbi')
    # analyzeOTUs sets flags on questionable taxa ("unclassified" and so on)
    #  to allow the option of suppression downstream
    ncbi.analyzeOTUs()
    align_and_merge(tax.alignment(ncbi))

    # Add GBIF subset fo the model taxonomy
    gbif = Taxonomy.getTaxonomy('t/tax/gbif_aster/', 'gbif')
    gbif.smush()
    # analyzeMajorRankConflicts sets the "major_rank_conflict" flag when
    # intermediate ranks are missing (e.g. a family that's a child of a
    # class)
    gbif.analyzeMajorRankConflicts()
    align_and_merge(tax.alignment(gbif))

    # "Old" patch system with tab-delimited files
    TsvEdits.edit(tax, 't/edits/')

    props = [has_parent(taxon('Phellinaceae'), taxon('Asterales'), 'test:1')]

    for prop in props:
        print proclaim(tax, prop)

    gen = tax.newTaxon("Opentreeia", "genus", "data:testing")
    gen.take(tax.newTaxon("Opentreeia sp. C", "species", "data:testing"))
    gen.take(tax.newTaxon("Opentreeia sp. D", "species", "data:testing"))

    # Example of referring to a taxon
    fam = tax.maybeTaxon("Phellinaceae")

    if fam != None:
        # Example of how you might add a genus to the taxonomy
        fam.take(gen)

    # Test deletion feature
    sp = tax.newTaxon("Opentreeia sp. C", "species", "data:testing")
    gen.take(sp)
    sp.prune("aster.py")

    # tax.loadPreferredIds('ids-that-are-otus.tsv')

    additions_repo_path = 't/feed/amendments/amendments-0'
    new_taxa_path = 't/new_taxa'

    # Assign identifiers to the taxa in the model taxonomy.  Identifiers
    # assigned in the previous version are carried over to this version.
    ids = Taxonomy.getTaxonomy('t/tax/prev_aster/', 'ott')
    tax.carryOverIds(ids)  # performs alignment

    Addition.processAdditions(additions_repo_path, tax)

    if False:  # too slow for everyday testing purposes.
        print '-- Checking id list'
        assign_ids_from_list(tax, 'ott_id_list/by_qid.csv')

    tax.assignNewIds(new_taxa_path)

    tax.check()

    # Write the model taxonomy out to a set of files
    tax.dump('t/tax/aster/', '\t|\t')
                        small, big, small_tax.id, small_id)
                    show_interloper(small_node, small_id, ott)

            else:
                print '** More than one taxon named %s is in %s' % (small, big)
                print '  ', small_nodes

    infile.close()


def show_interloper(small_node, small_id, ott):
    if small_node != small_node.taxon():
        print '   %s is a synonym for %s' % (small_node.name,
                                             small_node.taxon().name)
    probe = ott.lookupId(small_id)
    if probe != None:
        print '   Id %s belongs to %s' % (small_id, probe)
    else:
        print '   (There is no taxon with id %s)' % small_id


if __name__ == '__main__':
    if len(sys.argv) == 3:
        inclusions = sys.argv[1]
        taxname = sys.argv[2]
    else:
        print 'ignoring supplied args', sys.argv
        inclusions = 'inclusions.csv'
        taxname = 'tax/ott/'
    check(inclusions, Taxonomy.getTaxonomy(taxname, 'ott'))
Esempio n. 21
0
def assemble():

    # Create model taxonomy
    tax = UnionTaxonomy.newTaxonomy('ott')

    for name in ['Pentaphragma ellipticum',
                 'Lachnophyllum',
                 'Sipolisia',
                 'Cicerbita bourgaei',
                 'Adenophora triphylla',
                 'Artemisia vulgaris',
                 'Carlina libanotica',
    ]:
        tax.watch(name)

    # Establish homonym-resolution skeleton (not really used here)
    # skel = Taxonomy.getTaxonomy('tax/skel/', 'skel')
    # tax.setSkeleton(skel)


    # Add NCBI subset to the model taxonomy
    ncbi = Taxonomy.getTaxonomy('t/tax/ncbi_aster/', 'ncbi')
    # analyzeOTUs sets flags on questionable taxa ("unclassified" and so on)
    #  to allow the option of suppression downstream
    ncbi.analyzeOTUs()
    align_and_merge(tax.alignment(ncbi))

    # Add GBIF subset fo the model taxonomy
    gbif = Taxonomy.getTaxonomy('t/tax/gbif_aster/', 'gbif')
    gbif.smush()
    # analyzeMajorRankConflicts sets the "major_rank_conflict" flag when
    # intermediate ranks are missing (e.g. a family that's a child of a
    # class)
    gbif.analyzeMajorRankConflicts()
    align_and_merge(tax.alignment(gbif))

    # "Old" patch system with tab-delimited files
    TsvEdits.edit(tax, 't/edits/')

    props = [
        has_parent(taxon('Phellinaceae'), taxon('Asterales'), 'test:1')
    ]

    for prop in props:
        print proclaim(tax, prop)

    gen = tax.newTaxon("Opentreeia", "genus", "data:testing")
    gen.take(tax.newTaxon("Opentreeia sp. C", "species", "data:testing"))
    gen.take(tax.newTaxon("Opentreeia sp. D", "species", "data:testing"))

    # Example of referring to a taxon
    fam = tax.maybeTaxon("Phellinaceae")

    if fam != None:
        # Example of how you might add a genus to the taxonomy
        fam.take(gen)

    # Test deletion feature
    sp = tax.newTaxon("Opentreeia sp. C", "species", "data:testing")
    gen.take(sp)
    sp.prune("aster.py")

    # tax.loadPreferredIds('ids-that-are-otus.tsv')

    additions_repo_path = 't/feed/amendments/amendments-0'
    new_taxa_path = 't/new_taxa'

    # Assign identifiers to the taxa in the model taxonomy.  Identifiers
    # assigned in the previous version are carried over to this version.
    ids = Taxonomy.getTaxonomy('t/tax/prev_aster/', 'ott')
    tax.carryOverIds(ids)    # performs alignment

    Addition.processAdditions(additions_repo_path, tax)

    if False:  # too slow for everyday testing purposes.
        print '-- Checking id list'
        assign_ids_from_list(tax, 'ott_id_list/by_qid.csv')

    tax.assignNewIds(new_taxa_path)

    tax.check()

    # Write the model taxonomy out to a set of files
    tax.dump('t/tax/aster/', '\t|\t')
import sys

from org.opentreeoflife.taxa import Taxonomy
from org.opentreeoflife.smasher import AlignmentByName
from org.opentreeoflife.conflict import ConflictAnalysis

rug = Taxonomy.getTaxonomy('scratch/Ruggiero/', 'rug')

with open('scratch/Ruggiero.tre', 'w') as outfile:
    outfile.write(rug.toNewick(False))
    outfile.write('\n')
# Requires python.security.respectJavaAccessibility = false
# on java command line or in .jython

from org.opentreeoflife.taxa import Taxonomy
from org.opentreeoflife.smasher import UnionTaxonomy, HomonymReport

union = UnionTaxonomy()
skel = Taxonomy.getTaxonomy('tax/skel/', 'skel')
union.setSkeleton(skel)


def report(tax, tag):
    union.markDivisionsFromSkeleton(tax, skel)
    HomonymReport.homonymReport(tax, 'reports/' + tag + '-homonym-report.tsv')


if True:
    ott = Taxonomy.getTaxonomy('tax/ott/', 'ott')
    report(ott, 'ott')
else:
    import taxonomies
    report(taxonomies.loadSilva(), 'silva')
    report(taxonomies.loadH2007(), 'h2007')
    report(taxonomies.loadFung(), 'worms')
    report(taxonomies.loadFung(), 'if')
    report(taxonomies.loadNcbi(), 'ncbi')
    report(taxonomies.loadGbif(), 'gbif')
    report(taxonomies.loadIrmng(), 'irmng')
Esempio n. 24
0
import sys, os, csv

from org.opentreeoflife.taxa import Taxonomy, SourceTaxonomy, Taxon
from org.opentreeoflife.smasher import UnionTaxonomy

dwh = UnionTaxonomy.newTaxonomy('dwh')

#Use this to tell smasher what separation file to use
dwh.setSkeleton(Taxonomy.getTaxonomy('tax/separation/', 'separation'))

# 1. trunk
# 2. ictv
# 3. IOC
# 4. ASW
# 5. ODO
# 6. BOM
# 7. ERE
# 8. ONY
# 9. EET
# 10. NCBI
# 11. WOR
# 12. CLP
# 13. COL

#use this to load the taxonomies

trunk = Taxonomy.getTaxonomy('t/tax/2018_12/dynamichierarchytrunk2018-11-21/',
                             'trunk')
ictv = Taxonomy.getTaxonomy(
    't/tax/2018_12/ICTV-virus_taxonomy-with-higherClassification/', 'ictv')
IOC = Taxonomy.getTaxonomy('t/tax/2018_12/ioc-birdlist/', 'IOC')
        small = row[0]
        big = row[1]
        small_id = row[2]

        small_tax = ott.maybeTaxon(small_id)
        if small_tax == None:
            small_tax = ott.maybeTaxon(small)
            if small_tax == None:
                print '** No unique taxon with id %s or name %s' % (small_id, small)
            else:
                print '** %s is %s, not %s' % (small, small_tax.id, small_id)
        else:
            look = ott.maybeTaxon(small, big)
            if look == None:
                print '** %s=%s not under %s' % (small, small_id, big)
                small_tax.show()
            elif look != small_tax:
                print '** The %s that descends from %s is %s, not %s' % (small, big, look.id, small_id)
            if small_tax.isHidden():
                print '%s (%s) is hidden' % (small, small_id)

    infile.close()

if __name__ == '__main__':
    taxname = 'tax/ott/'
    if len(sys.argv) > 1:
        taxname = sys.argv[1]
    else:
        print sys.argv
    check(Taxonomy.getTaxonomy(taxname))
import sys, os, csv

from org.opentreeoflife.taxa import Taxonomy, SourceTaxonomy, Taxon
from org.opentreeoflife.smasher import UnionTaxonomy

dwh = UnionTaxonomy.newTaxonomy('dwh')

#Use this to tell smasher what separation file to use
dwh.setSkeleton(Taxonomy.getTaxonomy('tax/separation/', 'separation'))

# 1. trunk
# 2. ictv
# 3. IOC
# 4. ASW
# 5. ODO
# 6. BOM
# 7. ERE
# 8. ONY
# 9. EET
# 10. NCBI
# 11. WOR
# 12. CLP
# 13. COL

#use this to load the taxonomies

trunk = Taxonomy.getTaxonomy('t/tax/2018_12/trunk/', 'trunk')
ictv = Taxonomy.getTaxonomy('t/tax/2018_12/ictv/', 'ictv')
IOC = Taxonomy.getTaxonomy('t/tax/2018_12/IOC/', 'IOC')
ASW = Taxonomy.getTaxonomy('t/tax/2018_12/ASW/', 'ASW')
ODO = Taxonomy.getTaxonomy('t/tax/2018_12/ODO/', 'ODO')
import sys

from org.opentreeoflife.taxa import Taxonomy
from org.opentreeoflife.smasher import AlignmentByName
from org.opentreeoflife.conflict import ConflictAnalysis

rug = Taxonomy.getTaxonomy('scratch/Ruggiero/', 'rug')

with open('scratch/Ruggiero.tre', 'w') as outfile:
     outfile.write(rug.toNewick(False))
     outfile.write('\n')

Esempio n. 28
0
# One-off script prepared to provide data to David Hibbett and Romina Gazis.
# Lists numbers of species in each fungal order.

from org.opentreeoflife.taxa import Taxonomy
import csv, sys
from taxonomies import load_fung, load_ncbi, load_gbif, load_irmng

taxonomies = [('fung', load_fung(), 'Index Fungorum'),
              ('ncbi', load_ncbi(), 'NCBI'),
              ('gbif', load_gbif(), 'GBIF'),
              ('irmng', load_irmng(), 'IRMNG'),
              ('ott', Taxonomy.getTaxonomy('tax/ott/'), 'OTT 2.9'),
          ]

def main():
    infile = open('order-counts-orders.csv', 'r')
    reader = csv.reader(infile)
    reader.next()   #header row
    taxa = ['Fungi']
    for tuple in reader:
        taxa.append(tuple[0])
    infile.close()

    write_counts(taxa)

def write_counts(taxa):
    outfile = open('order-counts.csv', 'w')
    writer = csv.writer(outfile)
    header = ['order']
    for (name, taxonomy, label) in taxonomies:
        header += [label + ' bin', label + ' sp', label + ' tip']
                small_node = small_nodes[0]
                small_tax = small_node.taxon()
                if small_id != '' and small_tax != small_id_tax:
                    print '** The id of %s in %s is %s (expected %s)' % (small, big, small_tax.id, small_id)
                    show_interloper(small_node, small_id, ott)

            else:
                print '** More than one taxon named %s is in %s' % (small, big)
                print '  ', small_nodes

    infile.close()

def show_interloper(small_node, small_id, ott):
    if small_node != small_node.taxon():
        print '   %s is a synonym for %s' % (small_node.name, small_node.taxon().name)
    probe = ott.lookupId(small_id)
    if probe != None:
        print '   Id %s belongs to %s' % (small_id, probe)
    else:
        print '   (There is no taxon with id %s)' % small_id

if __name__ == '__main__':
    if len(sys.argv) == 3:
        inclusions = sys.argv[1]
        taxname = sys.argv[2]
    else:
        print 'ignoring supplied args', sys.argv
        inclusions = 'inclusions.csv'
        taxname = 'tax/ott/'
    check(inclusions, Taxonomy.getTaxonomy(taxname, 'ott'))
Esempio n. 30
0
def create_ott(ott_spec):

    # Fail fast
    additions_clone_path = os.path.join(access_head('amendments'),
                                        'amendments-1')
    if not os.path.isdir(additions_clone_path):
        print '# cannot find', additions_clone_path
        sys.exit(1)

    with open(os.path.join(access_head('idlist'), 'by_qid.csv'),
              'r') as infile:
        print '# can access idlist'

    ott_path = management.source_path(ott_spec)

    ott = UnionTaxonomy.newTaxonomy('ott')

    # Would be nice if there were tests for all of these...
    for name in names_of_interest:
        ott.eventLogger.namesOfInterest.add(name)

    ott.setSkeleton(Taxonomy.getTaxonomy('curation/separation/', 'separation'))

    # These are particularly hard cases; create alignment targets up front
    adjustments.deal_with_polysemies(ott)

    # Align and merge each source in sequence
    merge_sources(ott)

    # "Old" patch system
    TsvEdits.edit(ott, 'curation/edits/')

    # consider try: ... except: print '**** Exception in patch_ott'
    amendments.patch_ott(ott)

    # End of topology changes.  Now assign ids.
    retain_ids(ott, access_source('ott-PREVIOUS'),
               os.path.join(access_head('idlist'), 'by_qid.csv'))

    # Apply the additions (which already have ids assigned).
    # This has to happen *after* ids are assigned, since additions use OTT
    # ids to identify parents.
    print '-- Processing additions --'
    Addition.processAdditions(additions_clone_path, ott)

    # Mint ids for new nodes
    print '-- Minting new ids --'
    ott.assignNewIds(new_taxa_path)

    # Remove all trees but the largest (or make them life incertae sedis)
    ott.deforestate()

    # data structure integrity checks
    ott.check()

    # For deprecated id report (dump)
    ott.loadPreferredIds('ids_that_are_otus.tsv', False)
    ott.loadPreferredIds('ids_in_synthesis.tsv', True)

    ott.dump(ott_path)

    record_ott_sources(ott_spec)

    return ott
def create_ott():

    ott = UnionTaxonomy.newTaxonomy()

    # There ought to be tests for all of these...

    for name in names_of_interest:
        ott.eventlogger.namesOfInterest.add(name)

    # When lumping, prefer to use ids that have been used in OTU matching
    # This list could be used for all sorts of purposes...
    ott.loadPreferredIds('ids-that-are-otus.tsv', False)
    ott.loadPreferredIds('ids-in-synthesis.tsv', True)

    ott.setSkeleton(Taxonomy.getTaxonomy('tax/skel/', 'skel'))

    silva = prepare_silva(ott)
    ott.absorb(silva)
    check_invariants(ott)

    h2007 = prepare_h2007(ott)
    ott.absorb(h2007)

    (fungi, fungorum_sans_fungi) = prepare_fungorum(ott)
    ott.absorb(fungi)
    check_invariants(ott)

    # the non-Fungi from Index Fungorum get absorbed below

    lamiales = prepare_lamiales(ott)
    ott.absorb(lamiales)

    (malacostraca, worms_sans_malacostraca) = prepare_worms(ott)
    ott.absorb(malacostraca)

    ncbi = prepare_ncbi(ott)
    align_ncbi_to_silva(ncbi, silva, ott)
    ott.absorb(ncbi)
    check_invariants(ott)

    ott.absorb(worms_sans_malacostraca)

    ott.absorb(fungorum_sans_fungi)

    gbif = prepare_gbif(ott)
    ott.absorb(gbif)

    irmng = prepare_irmng(ott)
    ott.absorb(irmng)

    taxonomies.link_to_h2007(ott)

    get_default_extinct_info_from_gbif(gbif, ott)

    check_invariants(ott)
    # consider try: ... except: print '**** Exception in patch_ott'
    patch_ott(ott)

    # Experimental...
    unextinct_ncbi(ncbi, ott)

    # Remove all trees but the largest (or make them life incertae sedis)
    ott.deforestate()

    # -----------------------------------------------------------------------------
    # OTT id assignment

    # Force some id assignments... will try to automate this in the future.
    # Most of these come from looking at the otu-deprecated.tsv file after a 
    # series of smasher runs.

    for (inf, sup, id) in [
            ('Tipuloidea', 'Diptera', '722875'),
            ('Saccharomycetes', 'Saccharomycotina', '989999'),
            ('Phaeosphaeria', 'Ascomycota', '5486272'),
            ('Synedra acus','Eukaryota','992764'),
            ('Epiphloea','Halymeniaceae','5342325'),
            ('Hessea','Archaeplastida','600099'),
            ('Morganella','Arthropoda','6400'),
            ('Rhynchonelloidea','Rhynchonellidae','5316010'),
            ('Epiphloea', 'Lichinales', '5342482'),
            ('Morganella', 'Fungi', '973932'),
            ('Parmeliaceae', 'Lecanorales', '305904'),
    ]:
        tax = ott.taxon(inf, sup)
        if tax != None:
            tax.setId(id)

    ott.taxonThatContains('Rhynchonelloidea', 'Sphenarina').setId('795939') # NCBI

    for (ncbi_id, ott_id, name) in ncbi_assignments_list:
        n = ncbi.maybeTaxon(ncbi_id)
        if n != None:
            im = ott.image(n)
            if im != None:
                im.setId(ott_id)
            else:
                print '** NCBI %s not mapped - %s' % (ncbi_id, name)
        else:
            print '** No NCBI taxon %s - %s' % (ncbi_id, name)

    # Cylindrocarpon is now Neonectria
    ott.image(gbif.taxon('2563163')).setId('51754')

    # Foo
    trich = fungi.maybeTaxon('Trichosporon')
    if trich != None:
        ott.image(trich).setId('364222')

    #ott.image(fungi.taxon('11060')).setId('4107132') #Cryptococcus - a total mess


    # Assign OTT ids to taxa that don't have them, re-using old ids when possible
    ids = Taxonomy.getTaxonomy('tax/prev_ott/')

    # Assign old ids to nodes in the new version
    ott.assignIds(ids)

    report_on_h2007(h2007, ott)

    return ott
# Requires python.security.respectJavaAccessibility = false
# on java command line or in .jython

from org.opentreeoflife.taxa import Taxonomy
from org.opentreeoflife.smasher import UnionTaxonomy, HomonymReport 

union = UnionTaxonomy()
skel = Taxonomy.getTaxonomy('tax/skel/', 'skel')
union.setSkeleton(skel)

def report(tax, tag):
    union.markDivisionsFromSkeleton(tax, skel)
    HomonymReport.homonymReport(tax, 'reports/' + tag + '-homonym-report.tsv')

if True:
    ott = Taxonomy.getTaxonomy('tax/ott/', 'ott')
    report(ott, 'ott')
else:
    import taxonomies
    report(taxonomies.loadSilva(), 'silva')
    report(taxonomies.loadH2007(), 'h2007')
    report(taxonomies.loadFung(), 'worms')
    report(taxonomies.loadFung(), 'if')
    report(taxonomies.loadNcbi(), 'ncbi')
    report(taxonomies.loadGbif(), 'gbif')
    report(taxonomies.loadIrmng(), 'irmng')
# counts number of taxa with rank=family in a given taxon

from org.opentreeoflife.taxa import Taxonomy, Rank
import argparse

parser = argparse.ArgumentParser(description='load nexsons into postgres')
parser.add_argument('taxonname', help='name of taxon to count')
args = parser.parse_args()

name = args.taxonname
ott_path = '/Users/karen/Documents/opentreeoflife/data/ott/ott2.9draft12/'
ott = Taxonomy.getTaxonomy(ott_path, 'ott')


def count_families(taxon):
    count = 0
    with open('families.txt', 'w') as f:
        for t in taxon.descendants(False):
            if t.rank == Rank.FAMILY_RANK:
                f.write("{n}\n".format(n=t.name))
                count += 1
    f.close()
    return count


print "number families: ", count_families(ott.taxon(name))