# Requires python.security.respectJavaAccessibility = false
# on java command line or in .jython

from org.opentreeoflife.taxa import Taxonomy
from org.opentreeoflife.smasher import UnionTaxonomy, HomonymReport 

union = UnionTaxonomy()
skel = Taxonomy.getTaxonomy('tax/skel/', 'skel')
union.setSkeleton(skel)

def report(tax, tag):
    union.markDivisionsFromSkeleton(tax, skel)
    HomonymReport.homonymReport(tax, 'reports/' + tag + '-homonym-report.tsv')

if True:
    ott = Taxonomy.getTaxonomy('tax/ott/', 'ott')
    report(ott, 'ott')
else:
    import taxonomies
    report(taxonomies.loadSilva(), 'silva')
    report(taxonomies.loadH2007(), 'h2007')
    report(taxonomies.loadFung(), 'worms')
    report(taxonomies.loadFung(), 'if')
    report(taxonomies.loadNcbi(), 'ncbi')
    report(taxonomies.loadGbif(), 'gbif')
    report(taxonomies.loadIrmng(), 'irmng')
from org.opentreeoflife.smasher import Taxonomy
from org.opentreeoflife.smasher import Reportx
import taxonomies

ott = Taxonomy.newTaxonomy()
skel = Taxonomy.getTaxonomy('tax/skel/', 'skel')
ott.setSkeleton(skel)


def report(tax, tag):
    ott.markDivisions(tax)
    #	Reportx.bogotypes(tax)
    taxonomies.checkDivisions(tax)
    Reportx.report(tax, tag + '-mrca-report.tsv')


if True:
    report(taxonomies.loadIrmng(), 'irmng')
else:
    silva = taxonomies.loadSilva()
    ott.notSame(silva.taxon('Ctenophora', 'Coscinodiscophytina'),
                skel.taxon('Ctenophora'))
    report(silva, 'silva')
    report(taxonomies.loadH2007(), 'h2007')
    report(taxonomies.loadFung(), 'if')
    report(taxonomies.loadNcbi(), 'ncbi')
    report(taxonomies.loadGbif(), 'gbif')
    report(taxonomies.loadIrmng(), 'irmng')
    report(taxonomies.loadOtt(), 'ott')
예제 #3
0
def doFung():

	allfung  = taxonomies.loadFung()

	print "Index Fungorum has %s nodes"%allfung.count()

	# JAR 2014-04-27 JAR found while investigating 'hidden' status of
	# Thelohania butleri.  Move out of Protozoa to prevent their being hidden
	allfung.taxon('Fungi').take(allfung.taxon('Microsporidia'))

	# *** Non-Fungi processing

	# JAR 2014-05-13 Chlorophyte or fungus?  This one is very confused.
	# Pick it up from GBIF if at all
	allfung.taxon('Byssus phosphorea').prune()

	if False:  # see taxonomies.loadFung
		# Work in progress.  By promoting to root we've lost the fact that
		# protozoa are eukaryotes, which is unfortunate.  Not important in this
		# case, but suggestive of deeper problems in the framework.
		# Test case: Oomycota should end up in Stramenopiles.
		fung_Protozoa = allfung.maybeTaxon('Protozoa')
		if fung_Protozoa != None:
			fung_Protozoa.hide()   # recursive
			fung_Protozoa.detach()
			fung_Protozoa.elide()
		fung_Chromista = allfung.maybeTaxon('Chromista')
		if fung_Chromista != None:
			fung_Chromista.hide()  # recursive
			fung_Chromista.detach()
			fung_Chromista.elide()

	# IF Thraustochytriidae = SILVA Thraustochytriaceae ?  (Stramenopiles)
	# IF T. 90638 contains Sicyoidochytrium, Schizochytrium, Ulkenia, Thraustochytrium
	#  Parietichytrium, Elina, Botryochytrium, Althornia
	# SILVA T. contains Ulkenia and a few others of these... I say yes.
	thraust = allfung.taxon('90377')
	thraust.synonym('Thraustochytriaceae')
	thraust.synonym('Thraustochytriidae')
	thraust.synonym('Thraustochytridae')

	# IF Labyrinthulaceae = SILVA Labyrinthulomycetes ?  NO.
	# IF L. contains only Labyrinthomyxa, Labyrinthula
	# SILVA L. contains a lot more than that.

	# IF Hyphochytriaceae = SILVA Hyphochytriales ?
	# SILVA Hyphochytriales = AB622284/#4 contains only
	# Hypochitrium, Rhizidiomycetaceae

	# There are two Bacillaria.
	# 1. NCBI 3002, in Stramenopiles, contains Bacillaria paxillifer.
	#    No synonyms in NCBI.
	#    IF has Bacillaria as a synonym for Camillea (if:777).
	#    Bacillaria is not otherwise in IF.
	#    Cammillea in IF is in Stramenopiles.
	# 2. NCBI 109369, in Pezizomycotina
	#    No synonyms in NCBI.
	# NCBI 13677 = Camillea, a fish.

	# There are two Polyangium, a bacterium (NCBI) and a flatworm (IF).

	# smush folds sibling taxa that have the same name.
	# (repeats - see loadFung()  ???)
	allfung.smush()

	# *** Fungi processing

	print "Fungi in Index Fungorum has %s nodes"%allfung.taxon('Fungi').count()

	# fung = allfung.select(allfung.taxon('Fungi'))
	# SIDE EFFECT.  Ideally there would be a declarative
	# (non-side-effecty) way to do this.
	# allfung.taxon('Fungi').trim()

	# Revert to previous method for the time being due to large chunks of Fungi
	# lacking parent links
	fung = allfung

	# JAR 2014-04-11 Missing in earlier IF, mistake in later IF -
	# extraneous authority string.  See Romina's issue #42
	# This is a fungus.
	cyph = fung.maybeTaxon('Cyphellopsis')
	if cyph == None:
		cyph = fung.maybeTaxon('Cyphellopsis Donk 1931')
		if cyph != None:
			cyph.rename('Cyphellopsis')
		else:
			cyph = fung.newTaxon('Cyphellopsis', 'genus', 'if:17439')
		fung.taxon('Niaceae').take(cyph)

	# 2014-03-07 Prevent a false match
	# https://groups.google.com/d/msg/opentreeoflife/5SAPDerun70/fRjA2M6z8tIJ
	# This is a fungus in Pezizomycotina
	ott.notSame(silva.taxon('Phaeosphaeria'), fung.taxon('Phaeosphaeria'))

	# 2014-04-08 This was causing Agaricaceae to be paraphyletic
	ott.notSame(silva.taxon('Morganella'), fung.taxon('Morganella'))

	# 2014-04-08 More IF/SILVA bad matches
	# https://github.com/OpenTreeOfLife/reference-taxonomy/issues/63
	for name in ['Trichoderma harzianum',  # in Pezizomycotina
				 'Acantharia',			   # in Pezizomycotina
				 'Bogoriella',			   # in Pezizomycotina
				 'Steinia',				   # in Pezizomycotina
				 'Sclerotinia homoeocarpa', # in Pezizomycotina
				 'Epiphloea',			   # in Pezizomycotina
				 'Campanella',			   # in Agaricomycotina
				 'Lacrymaria',			   # in Agaricomycotina
				 'Frankia',		  		   # in Pezizomycotina / bacterium in SILVA
				 'Phialina',			   # in Pezizomycotina
				 'Puccinia triticina']:    # in Pucciniomycotina in Fungi
		ott.notSame(silva.taxon(name), fung.taxon(name))

	# Romina 2014-04-09
	# IF has both Hypocrea and Trichoderma.  Hypocrea is the right name.
	# https://github.com/OpenTreeOfLife/reference-taxonomy/issues/86
	fung.taxon('Trichoderma viride').rename('Hypocrea rufa')  # Type
	fung.taxon('Hypocrea').absorb(fung.taxonThatContains('Trichoderma', 'Hypocrea rufa'))

	# Romina https://github.com/OpenTreeOfLife/reference-taxonomy/issues/42
	fung.taxon('Trichoderma deliquescens').rename('Hypocrea lutea')

	# 2014-04-25 JAR
	# There are three Bostrychias: a rhodophyte, a fungus, and a bird.
	# The fungus name is a synonym for Cytospora.
	if fung.maybeTaxon('Bostrychia', 'Ascomycota') != None:
		ott.notSame(silva.taxon('Bostrychia', 'Rhodophyceae'), fung.taxon('Bostrychia', 'Ascomycota'))

	# analyzeMajorRankConflicts sets the "major_rank_conflict" flag when
	# intermediate ranks are missing (e.g. a family that's a child of a
	# class)
	fung.analyzeMajorRankConflicts()

	ott.absorb(fung)

	print "Fungi in h2007 + if has %s nodes"%ott.taxon('Fungi').count()

	# https://github.com/OpenTreeOfLife/reference-taxonomy/issues/20
	# Problem: Chlamydotomus is an incertae sedis child of Fungi.  Need to
	# find a good home for it.
	#
	# Mycobank says Chlamydotomus beigelii = Trichosporon beigelii:
	# http://www.mycobank.org/BioloMICS.aspx?Link=T&TableKey=14682616000000067&Rec=35058&Fields=All
	#
	# IF says the basionym is Pleurococcus beigelii, and P. beigelii's current name
	# is Geotrichum beigelii.  IF says the type for Trichosporon is Trichosporon beigelii,
	# and that T. beigelii's current name is Trichosporum beigelii... with no synonymies...
	# So IF does not corroborate Mycobank.
	#
	# So we could consider absorbing Chlamydotomus into Trichosoporon.  But...
	#
	# Not sure about this.  beigelii has a sister, cellaris, that should move along
	# with it, but the name Trichosporon cellaris has never been published.
	# Cb = ott.taxon('Chlamydotomus beigelii')
	# Cb.rename('Trichosporon beigelii')
	# ott.taxon('Trichosporon').take(Cb)
	#
	# Just make it incertae sedis and put off dealing with it until someone cares...


	# 2014-04-13 Romina #40, #60
	for foo in [('Neozygitales', ['Neozygitaceae']),
				('Asteriniales', ['Asterinaceae']),
				('Savoryellales', ['Savoryella', 'Ascotaiwania', 'Ascothailandia']), 
				('Cladochytriales', ['Cladochytriaceae', 'Nowakowskiellaceae', 'Septochytriaceae', 'Endochytriaceae']),
				('Jaapiales', ['Jaapiaceae']),
				('Coniocybales', ['Coniocybaceae']),
				('Hyaloraphidiales', ['Hyaloraphidiaceae']),
				('Mytilinidiales', ['Mytilinidiaceae', 'Gloniaceae'])]:
		order = ott.taxon(foo[0])
		for family in foo[1]:
			order.take(ott.taxon(family))

	# ** No taxon found with this name: Nowakowskiellaceae
	# ** No taxon found with this name: Septochytriaceae
	# ** No taxon found with this name: Jaapiaceae
	# ** (null=if:81865 Rhizocarpaceae) is already a child of (null=h2007:212 Rhizocarpales)
	# ** No taxon found with this name: Hyaloraphidiaceae

	return fung
예제 #4
0
def doFung():

	allfung  = taxonomies.loadFung()

	print "Index Fungorum has %s nodes"%allfung.count()

	# JAR 2014-04-27 JAR found while investigating 'hidden' status of
	# Thelohania butleri.  Move out of Protozoa to prevent their being hidden
	allfung.taxon('Fungi').take(allfung.taxon('Microsporidia'))

	# *** Non-Fungi processing

	# JAR 2014-05-13 Chlorophyte or fungus?  This one is very confused.
	# Pick it up from GBIF if at all
	allfung.taxon('Byssus phosphorea').prune()

	if False:  # see taxonomies.loadFung
		# Work in progress.  By promoting to root we've lost the fact that
		# protozoa are eukaryotes, which is unfortunate.  Not important in this
		# case, but suggestive of deeper problems in the framework.
		# Test case: Oomycota should end up in Stramenopiles.
		fung_Protozoa = allfung.maybeTaxon('Protozoa')
		if fung_Protozoa != None:
			fung_Protozoa.hide()   # recursive
			fung_Protozoa.detach()
			fung_Protozoa.elide()
		fung_Chromista = allfung.maybeTaxon('Chromista')
		if fung_Chromista != None:
			fung_Chromista.hide()  # recursive
			fung_Chromista.detach()
			fung_Chromista.elide()

	# IF Thraustochytriidae = SILVA Thraustochytriaceae ?  (Stramenopiles)
	# IF T. 90638 contains Sicyoidochytrium, Schizochytrium, Ulkenia, Thraustochytrium
	#  Parietichytrium, Elina, Botryochytrium, Althornia
	# SILVA T. contains Ulkenia and a few others of these... I say yes.
	thraust = allfung.taxon('90377')
	thraust.synonym('Thraustochytriaceae')
	thraust.synonym('Thraustochytriidae')
	thraust.synonym('Thraustochytridae')

	# IF Labyrinthulaceae = SILVA Labyrinthulomycetes ?  NO.
	# IF L. contains only Labyrinthomyxa, Labyrinthula
	# SILVA L. contains a lot more than that.

	# IF Hyphochytriaceae = SILVA Hyphochytriales ?
	# SILVA Hyphochytriales = AB622284/#4 contains only
	# Hypochitrium, Rhizidiomycetaceae

	# There are two Bacillaria.
	# 1. NCBI 3002, in Stramenopiles, contains Bacillaria paxillifer.
	#    No synonyms in NCBI.
	#    IF has Bacillaria as a synonym for Camillea (if:777).
	#    Bacillaria is not otherwise in IF.
	#    Cammillea in IF is in Stramenopiles.
	# 2. NCBI 109369, in Pezizomycotina
	#    No synonyms in NCBI.
	# NCBI 13677 = Camillea, a fish.

	# There are two Polyangium, a bacterium (NCBI) and a flatworm (IF).

	# smush folds sibling taxa that have the same name.
	# (repeats - see loadFung()  ???)
	allfung.smush()

	# *** Fungi processing

	print "Fungi in Index Fungorum has %s nodes"%allfung.taxon('Fungi').count()

	# fung = allfung.select(allfung.taxon('Fungi'))
	# SIDE EFFECT.  Ideally there would be a declarative
	# (non-side-effecty) way to do this.
	# allfung.taxon('Fungi').trim()

	# Revert to previous method for the time being due to large chunks of Fungi
	# lacking parent links
	fung = allfung

	# JAR 2014-04-11 Missing in earlier IF, mistake in later IF -
	# extraneous authority string.  See Romina's issue #42
	# This is a fungus.
	cyph = fung.maybeTaxon('Cyphellopsis')
	if cyph == None:
		cyph = fung.maybeTaxon('Cyphellopsis Donk 1931')
		if cyph != None:
			cyph.rename('Cyphellopsis')
		else:
			cyph = fung.newTaxon('Cyphellopsis', 'genus', 'if:17439')
		fung.taxon('Niaceae').take(cyph)

	# 2014-03-07 Prevent a false match
	# https://groups.google.com/d/msg/opentreeoflife/5SAPDerun70/fRjA2M6z8tIJ
	# This is a fungus in Pezizomycotina
	ott.notSame(silva.taxon('Phaeosphaeria'), fung.taxon('Phaeosphaeria'))

	# 2014-04-08 This was causing Agaricaceae to be paraphyletic
	ott.notSame(silva.taxon('Morganella'), fung.taxon('Morganella'))

	# 2014-04-08 More IF/SILVA bad matches
	# https://github.com/OpenTreeOfLife/reference-taxonomy/issues/63
	for name in ['Trichoderma harzianum',  # in Pezizomycotina
				 'Acantharia',			   # in Pezizomycotina
				 'Bogoriella',			   # in Pezizomycotina
				 'Steinia',				   # in Pezizomycotina
				 'Sclerotinia homoeocarpa', # in Pezizomycotina
				 'Epiphloea',			   # in Pezizomycotina
				 'Campanella',			   # in Agaricomycotina
				 'Lacrymaria',			   # in Agaricomycotina
				 'Frankia',		  		   # in Pezizomycotina / bacterium in SILVA
				 'Phialina',			   # in Pezizomycotina
				 'Puccinia triticina']:    # in Pucciniomycotina in Fungi
		ott.notSame(silva.taxon(name), fung.taxon(name))

	# Romina 2014-04-09
	# IF has both Hypocrea and Trichoderma.  Hypocrea is the right name.
	# https://github.com/OpenTreeOfLife/reference-taxonomy/issues/86
	fung.taxon('Trichoderma viride').rename('Hypocrea rufa')  # Type
	fung.taxon('Hypocrea').absorb(fung.taxonThatContains('Trichoderma', 'Hypocrea rufa'))

	# Romina https://github.com/OpenTreeOfLife/reference-taxonomy/issues/42
	fung.taxon('Trichoderma deliquescens').rename('Hypocrea lutea')

	# 2014-04-25 JAR
	# There are three Bostrychias: a rhodophyte, a fungus, and a bird.
	# The fungus name is a synonym for Cytospora.
	if fung.maybeTaxon('Bostrychia', 'Ascomycota') != None:
		ott.notSame(silva.taxon('Bostrychia', 'Rhodophyceae'), fung.taxon('Bostrychia', 'Ascomycota'))

	# analyzeMajorRankConflicts sets the "major_rank_conflict" flag when
	# intermediate ranks are missing (e.g. a family that's a child of a
	# class)
	fung.analyzeMajorRankConflicts()

	ott.absorb(fung)

	print "Fungi in h2007 + if has %s nodes"%ott.taxon('Fungi').count()

	# https://github.com/OpenTreeOfLife/reference-taxonomy/issues/20
	# Problem: Chlamydotomus is an incertae sedis child of Fungi.  Need to
	# find a good home for it.
	#
	# Mycobank says Chlamydotomus beigelii = Trichosporon beigelii:
	# http://www.mycobank.org/BioloMICS.aspx?Link=T&TableKey=14682616000000067&Rec=35058&Fields=All
	#
	# IF says the basionym is Pleurococcus beigelii, and P. beigelii's current name
	# is Geotrichum beigelii.  IF says the type for Trichosporon is Trichosporon beigelii,
	# and that T. beigelii's current name is Trichosporum beigelii... with no synonymies...
	# So IF does not corroborate Mycobank.
	#
	# So we could consider absorbing Chlamydotomus into Trichosoporon.  But...
	#
	# Not sure about this.  beigelii has a sister, cellaris, that should move along
	# with it, but the name Trichosporon cellaris has never been published.
	# Cb = ott.taxon('Chlamydotomus beigelii')
	# Cb.rename('Trichosporon beigelii')
	# ott.taxon('Trichosporon').take(Cb)
	#
	# Just make it incertae sedis and put off dealing with it until someone cares...


	# 2014-04-13 Romina #40, #60
	for foo in [('Neozygitales', ['Neozygitaceae']),
				('Asteriniales', ['Asterinaceae']),
				('Savoryellales', ['Savoryella', 'Ascotaiwania', 'Ascothailandia']), 
				('Cladochytriales', ['Cladochytriaceae', 'Nowakowskiellaceae', 'Septochytriaceae', 'Endochytriaceae']),
				('Jaapiales', ['Jaapiaceae']),
				('Coniocybales', ['Coniocybaceae']),
				('Hyaloraphidiales', ['Hyaloraphidiaceae']),
				('Mytilinidiales', ['Mytilinidiaceae', 'Gloniaceae'])]:
		order = ott.taxon(foo[0])
		for family in foo[1]:
			order.take(ott.taxon(family))

	# ** No taxon found with this name: Nowakowskiellaceae
	# ** No taxon found with this name: Septochytriaceae
	# ** No taxon found with this name: Jaapiaceae
	# ** (null=if:81865 Rhizocarpaceae) is already a child of (null=h2007:212 Rhizocarpales)
	# ** No taxon found with this name: Hyaloraphidiaceae

	return fung
# Requires python.security.respectJavaAccessibility = false
# on java command line or in .jython

from org.opentreeoflife.taxa import Taxonomy
from org.opentreeoflife.smasher import UnionTaxonomy, HomonymReport

union = UnionTaxonomy()
skel = Taxonomy.getTaxonomy('tax/skel/', 'skel')
union.setSkeleton(skel)


def report(tax, tag):
    union.markDivisionsFromSkeleton(tax, skel)
    HomonymReport.homonymReport(tax, 'reports/' + tag + '-homonym-report.tsv')


if True:
    ott = Taxonomy.getTaxonomy('tax/ott/', 'ott')
    report(ott, 'ott')
else:
    import taxonomies
    report(taxonomies.loadSilva(), 'silva')
    report(taxonomies.loadH2007(), 'h2007')
    report(taxonomies.loadFung(), 'worms')
    report(taxonomies.loadFung(), 'if')
    report(taxonomies.loadNcbi(), 'ncbi')
    report(taxonomies.loadGbif(), 'gbif')
    report(taxonomies.loadIrmng(), 'irmng')
from org.opentreeoflife.smasher import Taxonomy
from org.opentreeoflife.smasher import Reportx
import taxonomies

ott = Taxonomy.newTaxonomy()
skel = Taxonomy.getTaxonomy('tax/skel/', 'skel')
ott.setSkeleton(skel)

def report(tax, tag):
	ott.markDivisions(tax)
#	Reportx.bogotypes(tax)
	taxonomies.checkDivisions(tax)
	Reportx.report(tax, tag + '-mrca-report.tsv')

if True:
	report(taxonomies.loadIrmng(), 'irmng')
else:
	silva = taxonomies.loadSilva()
	ott.notSame(silva.taxon('Ctenophora', 'Coscinodiscophytina'),
				skel.taxon('Ctenophora'))
	report(silva, 'silva')
	report(taxonomies.loadH2007(), 'h2007')
	report(taxonomies.loadFung(), 'if')
	report(taxonomies.loadNcbi(), 'ncbi')
	report(taxonomies.loadGbif(), 'gbif')
	report(taxonomies.loadIrmng(), 'irmng')
	report(taxonomies.loadOtt(), 'ott')