Example #1
0
# One-off script prepared to provide data to David Hibbett and Romina Gazis.
# Lists numbers of species in each fungal order.

from org.opentreeoflife.taxa import Taxonomy
import csv, sys
from taxonomies import load_fung, load_ncbi, load_gbif, load_irmng

taxonomies = [('fung', load_fung(), 'Index Fungorum'),
              ('ncbi', load_ncbi(), 'NCBI'),
              ('gbif', load_gbif(), 'GBIF'),
              ('irmng', load_irmng(), 'IRMNG'),
              ('ott', Taxonomy.getTaxonomy('tax/ott/'), 'OTT 2.9'),
          ]

def main():
    infile = open('order-counts-orders.csv', 'r')
    reader = csv.reader(infile)
    reader.next()   #header row
    taxa = ['Fungi']
    for tuple in reader:
        taxa.append(tuple[0])
    infile.close()

    write_counts(taxa)

def write_counts(taxa):
    outfile = open('order-counts.csv', 'w')
    writer = csv.writer(outfile)
    header = ['order']
    for (name, taxonomy, label) in taxonomies:
        header += [label + ' bin', label + ' sp', label + ' tip']
def prepare_ncbi(ott):

    ncbi = taxonomies.load_ncbi()
    ott.addSource(ncbi)

    # David Hibbett has requested that for Fungi, only Index Fungorum
    # should be seen.  Rather than delete the NCBI fungal taxa, we just
    # mark them 'hidden' so they can be suppressed downstream.  This
    # preserves the identifier assignments, which may have been used
    # somewhere.
    ncbi.taxon('Fungi').hideDescendantsToRank('species')

    # - Alignment to OTT -

    #ott.same(ncbi.taxon('Cyanobacteria'), silva.taxon('D88288/#3'))
    # #### Check - was fungi.taxon
    # ** No unique taxon found with this name: Burkea
    # ** No unique taxon found with this name: Coscinium
    # ** No unique taxon found with this name: Perezia
    # ott.notSame(ncbi.taxon('Burkea', 'Viridiplantae'), ott.taxon('Burkea'))
    # ott.notSame(ncbi.taxon('Coscinium', 'Viridiplantae'), ott.taxon('Coscinium'))
    # ott.notSame(ncbi.taxon('Perezia', 'Viridiplantae'), ott.taxon('Perezia'))

    # JAR 2014-04-11 Discovered during regression testing
    # now handled in other ways
    # ott.notSame(ncbi.taxon('Epiphloea', 'Rhodophyta'), ott.taxon('Epiphloea', 'Ascomycota'))

    # JAR attempt to resolve ambiguous alignment of Trichosporon in IF and
    # NCBI based on common parent and member.
    # Type = T. beigelii, which is current, according to Mycobank.
    # But I'm going to use a different 'type', Trichosporon cutaneum.
    # #### Check - was fungi.taxon
    ott.same(ott.taxonThatContains('Trichosporon', 'Trichosporon cutaneum'),
             #ncbi.taxonThatContains('Trichosporon', 'Trichosporon cutaneum')
             ncbi.taxon('5552')
             )

    # 2014-04-23 In new version of IF - obvious misalignment
    # #### Check - was fungi.taxon
    # ott.notSame(ncbi.taxon('Crepidula', 'Gastropoda'), ott.taxon('Crepidula', 'Microsporidia'))
    # ott.notSame(ncbi.taxon('Hessea', 'Viridiplantae'), ott.taxon('Hessea', 'Microsporidia'))
    # 2014-04-23 Resolve ambiguity introduced into new version of IF
    # http://www.speciesfungorum.org/Names/SynSpecies.asp?RecordID=331593
    # #### Check - was fungi.taxon
    ott.same(ncbi.taxon('Gymnopilus spectabilis var. junonius'), ott.taxon('Gymnopilus junonius'))

    # JAR 2014-04-23 More sample contamination in SILVA 115
    # #### Check - was fungi.taxon
    # ott.same(ncbi.taxon('Lamprospora'), ott.taxon('Lamprospora', 'Pyronemataceae'))

    # JAR 2014-04-25
        # ### CHECK: was silva.taxon
    # ott.notSame(ott.taxon('Bostrychia', 'Rhodophyceae'), ncbi.taxon('Bostrychia', 'Aves'))

    # https://github.com/OpenTreeOfLife/feedback/issues/45
    if False:
        ott.notSame(ott.maybeTaxon('Choanoflagellida', 'Ichthyosporea'),
                    ncbi.maybeTaxon('Choanoflagellida', 'Opisthokonta'))

    # Dail 2014-03-31 https://github.com/OpenTreeOfLife/feedback/issues/5
    # updated 2015-06-28 NCBI Katablepharidophyta = SILVA Kathablepharidae.
        # ### CHECK: was silva.taxon
    ott.same(ncbi.taxon('Katablepharidophyta'), ott.taxon('Kathablepharidae'))
    # was: ott.taxon('Katablepharidophyta').hide()

    # probably not needed
    ott.same(ncbi.taxon('Ciliophora', 'Alveolata'), ott.taxon('Ciliophora', 'Alveolata'))

    ott.notSame(ncbi.taxon('Diphylleia', 'Chloroplastida'),
                ott.taxonThatContains('Diphylleia', 'Diphylleia rotans'))

    # 2015-10-06 JAR noticed while debugging surprisingly large number of newly-hidden ids
    # n.b. there are three Ctenophoras: comb jellies, diatom, fly
    ott.notSame(ncbi.taxon('Ctenophora', 'Metazoa'),
                ott.taxon('Ctenophora', 'Stramenopiles')) # diatom from SILVA, WoRMS, GBIF, IRMNG

    return ncbi
# One-off script prepared to provide data to David Hibbett and Romina Gazis.
# Lists numbers of species in each fungal order.

from org.opentreeoflife.smasher import Taxonomy
import csv, sys
from taxonomies import load_fung, load_ncbi, load_gbif, load_irmng

taxonomies = [
    ("fung", load_fung(), "Index Fungorum"),
    ("ncbi", load_ncbi(), "NCBI"),
    ("gbif", load_gbif(), "GBIF"),
    ("irmng", load_irmng(), "IRMNG"),
    ("ott", Taxonomy.getTaxonomy("tax/ott/"), "OTT 2.9"),
]


def main():
    infile = open("order-counts-orders.csv", "r")
    reader = csv.reader(infile)
    reader.next()  # header row
    taxa = ["Fungi"]
    for tuple in reader:
        taxa.append(tuple[0])
    infile.close()

    write_counts(taxa)


def write_counts(taxa):
    outfile = open("order-counts.csv", "w")
    writer = csv.writer(outfile)
Example #4
0
def prepare_ncbi(ott):

    ncbi = taxonomies.load_ncbi()

    # David Hibbett has requested that for Fungi, only Index Fungorum
    # should be seen.  Rather than delete the NCBI fungal taxa, we just
    # mark them 'hidden' so they can be suppressed downstream.  This
    # preserves the identifier assignments, which may have been used
    # somewhere.
    ncbi.taxon('Fungi').hideDescendantsToRank('species')

    # - Alignment to OTT -

    #ott.same(ncbi.taxon('Cyanobacteria'), silva.taxon('D88288/#3'))
    # #### Check - was fungi.taxon
    # ** No unique taxon found with this name: Burkea
    # ** No unique taxon found with this name: Coscinium
    # ** No unique taxon found with this name: Perezia
    # ott.notSame(ncbi.taxon('Burkea', 'Viridiplantae'), ott.taxon('Burkea'))
    # ott.notSame(ncbi.taxon('Coscinium', 'Viridiplantae'), ott.taxon('Coscinium'))
    # ott.notSame(ncbi.taxon('Perezia', 'Viridiplantae'), ott.taxon('Perezia'))

    # JAR 2014-04-11 Discovered during regression testing
    # now handled in other ways
    # ott.notSame(ncbi.taxon('Epiphloea', 'Rhodophyta'), ott.taxon('Epiphloea', 'Ascomycota'))

    # JAR attempt to resolve ambiguous alignment of Trichosporon in IF and
    # NCBI based on common parent and member.
    # Type = T. beigelii, which is current, according to Mycobank.
    # But I'm going to use a different 'type', Trichosporon cutaneum.
    # #### Check - was fungi.taxon
    ott.same(
        ott.taxonThatContains('Trichosporon', 'Trichosporon cutaneum'),
        #ncbi.taxonThatContains('Trichosporon', 'Trichosporon cutaneum')
        ncbi.taxon('5552'))

    # 2014-04-23 In new version of IF - obvious misalignment
    # #### Check - was fungi.taxon
    # ott.notSame(ncbi.taxon('Crepidula', 'Gastropoda'), ott.taxon('Crepidula', 'Microsporidia'))
    # ott.notSame(ncbi.taxon('Hessea', 'Viridiplantae'), ott.taxon('Hessea', 'Microsporidia'))
    # 2014-04-23 Resolve ambiguity introduced into new version of IF
    # http://www.speciesfungorum.org/Names/SynSpecies.asp?RecordID=331593
    # #### Check - was fungi.taxon
    ott.same(ncbi.taxon('Gymnopilus spectabilis var. junonius'),
             ott.taxon('Gymnopilus junonius'))

    # JAR 2014-04-23 More sample contamination in SILVA 115
    # #### Check - was fungi.taxon
    # ott.same(ncbi.taxon('Lamprospora'), ott.taxon('Lamprospora', 'Pyronemataceae'))

    # JAR 2014-04-25
    # ### CHECK: was silva.taxon
    # ott.notSame(ott.taxon('Bostrychia', 'Rhodophyceae'), ncbi.taxon('Bostrychia', 'Aves'))

    # https://github.com/OpenTreeOfLife/feedback/issues/45
    if False:
        ott.notSame(ott.maybeTaxon('Choanoflagellida', 'Ichthyosporea'),
                    ncbi.maybeTaxon('Choanoflagellida', 'Opisthokonta'))

    # Dail 2014-03-31 https://github.com/OpenTreeOfLife/feedback/issues/5
    # updated 2015-06-28 NCBI Katablepharidophyta = SILVA Kathablepharidae.
    # ### CHECK: was silva.taxon
    ott.same(ncbi.taxon('Katablepharidophyta'), ott.taxon('Kathablepharidae'))
    # was: ott.taxon('Katablepharidophyta').hide()

    # probably not needed
    ott.same(ncbi.taxon('Ciliophora', 'Alveolata'),
             ott.taxon('Ciliophora', 'Alveolata'))

    ott.notSame(ncbi.taxon('Diphylleia', 'Chloroplastida'),
                ott.taxonThatContains('Diphylleia', 'Diphylleia rotans'))

    # 2015-10-06 JAR noticed while debugging surprisingly large number of newly-hidden ids
    # n.b. there are three Ctenophoras: comb jellies, diatom, fly
    ott.notSame(
        ncbi.taxon('Ctenophora', 'Metazoa'),
        ott.taxon('Ctenophora',
                  'Stramenopiles'))  # diatom from SILVA, WoRMS, GBIF, IRMNG

    return ncbi