def gapToTIR_flat(sbItem): from datetime import datetime from bis import bis _gapTaxonomicGroups = {} _gapTaxonomicGroups["m"] = "mammals" _gapTaxonomicGroups["b"] = "birds" _gapTaxonomicGroups["a"] = "amphibians" _gapTaxonomicGroups["r"] = "reptiles" newItem = {} newItem["sbdoc"] = sbItem newItem["source"] = "GAP Species" newItem["registrationDate"] = datetime.utcnow().isoformat() newItem["followTaxonomy"] = False newItem["taxonomicLookupProperty"] = "tsn" for tag in sbItem["tags"]: if tag["scheme"] == "https://www.sciencebase.gov/vocab/bis/tir/scientificname": newItem["scientificname"] = tag["name"] elif tag["scheme"] == "https://www.sciencebase.gov/vocab/bis/tir/commonname": newItem["commonname"] = bis.stringCleaning(tag["name"]) for identifier in sbItem["identifiers"]: newItem[identifier["type"]] = identifier["key"] if identifier["type"] == "GAP_SpeciesCode": newItem["taxonomicgroup"] = _gapTaxonomicGroups[identifier["key"][:1]] return newItem
def getTESSSearchURL(queryType, criteria): from bis import bis if queryType != "TSN": criteria = '"' + bis.stringCleaning(criteria) + '"' return "https://ecos.fws.gov/ecp0/TessQuery?request=query&xquery=/SPECIES_DETAIL[" + queryType + "=" + criteria + "]"
def getBISONSearchURL(queryType, criteria): from bis import bis _baseURL = "https://bison.usgs.gov/api/search.json?count=1&" if queryType != "TSN": return _baseURL + "type=scientific_name&species=" + bis.stringCleaning( criteria) else: return _baseURL + "tsn=" + str(criteria)
def packageITISJSON(itisDoc): from datetime import datetime from bis import bis itisData = {} itisData["cacheDate"] = datetime.utcnow().isoformat() if type(itisDoc) is not int: # Get rid of parts of the ITIS doc that we don't want/need to cache primaryKeysToPop = [ "_version_", "credibilityRating", "expert", "geographicDivision", "hierarchicalSort", "hierarchyTSN", "jurisdiction", "publication", "rankID", "otherSource", "taxonAuthor", "comment" ] for key in primaryKeysToPop: itisDoc.pop(key, None) # Make a clean structure of the taconomic hierarchy itisDoc["taxonomy"] = [] for rank in itisDoc['hierarchySoFarWRanks'][ 0][itisDoc['hierarchySoFarWRanks'][0].find(':$') + 2:-1].split("$"): thisRankName = {} thisRankName["rank"] = rank.split(":")[0] thisRankName["name"] = rank.split(":")[1] itisDoc["taxonomy"].append(thisRankName) itisDoc.pop("hierarchySoFarWRanks", None) # Make a clean, usable list of the hierarchy so far for display or listing itisDoc["hierarchy"] = itisDoc["hierarchySoFar"][0].split( ":")[1][1:-1].split("$") itisDoc.pop("hierarchySoFar", None) # Make a clean structure of common names if "vernacular" in itisDoc: itisDoc["commonnames"] = [] for commonName in itisDoc['vernacular']: thisCommonName = {} thisCommonName["name"] = bis.stringCleaning( commonName.split('$')[1]) thisCommonName["language"] = commonName.split('$')[2] itisDoc["commonnames"].append(thisCommonName) itisDoc.pop("vernacular", None) # Add the new ITIS doc to the ITIS data structure and return itisData.update(itisDoc) return itisData
def packageWoRMSJSON(matchMethod, matchString, wormsDoc): from datetime import datetime from bis import bis wormsData = {} wormsData["cacheDate"] = datetime.utcnow().isoformat() wormsData["MatchMethod"] = matchMethod wormsData["MatchString"] = bis.stringCleaning(matchString) if type(wormsDoc) is not int: # Remove WoRMS properties that we don't want/need to cache keysToPop = ["authority", "citation", "valid_authority", "url"] for key in keysToPop: wormsDoc.pop(key, None) wormsData.update(wormsDoc) return wormsData
def getSGCNStatesByYear(baseURL, scientificname): import requests from bis import bis _taxonomicgroup = None q = "SELECT sgcn_year, array_to_string(array_agg(sgcn_state), ',') states FROM sgcn.sgcn WHERE scientificname_submitted = '" + bis.stringCleaning( scientificname) + "' GROUP BY sgcn_year" r = requests.get(baseURL + "&q=" + q).json() if len(r["features"]) > 0: stateLists = [] for feature in r["features"]: stateLists.append(feature["properties"]) return stateLists
def getSGCNTaxonomicGroup(baseURL, scientificname): import requests from bis import bis _taxonomicgroup = None q = "SELECT taxonomicgroup_submitted FROM sgcn.sgcn WHERE scientificname_submitted = '" + bis.stringCleaning( scientificname ) + "' AND taxonomicgroup_submitted <> '' ORDER BY dateinserted ASC LIMIT 1" r = requests.get(baseURL + "&q=" + q).json() if len(r["features"]) > 0: _taxonomicgroup = r["features"][0]["properties"][ "taxonomicgroup_submitted"] return _taxonomicgroup
thisRecord["itis"]["MatchMethod"] = "Fuzzy Match" thisRecord["worms"] = json.loads( recordToSearch["features"][0]["properties"]["worms"]) thisRecord["sgcn"] = json.loads( recordToSearch["features"][0]["properties"]["sgcn"]) tirCommon = {} tirCommon["cachedate"] = datetime.utcnow().isoformat() tirCommon["tirid"] = recordToSearch["features"][0]["properties"]["id"] tirCommon["tirapi"] = thisRun[ "readAPI"] + "?q=SELECT * FROM tir.tir WHERE id=" + str( tirCommon["tirid"]) tirCommon["registeredname"] = bis.stringCleaning( thisRecord["registration"]["scientificname"]) tirCommon["scientificname"] = tirCommon["registeredname"] tirCommon["commonname"] = None tirCommon["taxonomicgroup"] = "Other" tirCommon["taxonomicrank"] = "Unknown Taxonomic Rank" tirCommon["matchmethod"] = "Not Matched" tirCommon[ "acceptedauthorityapi"] = "Not Matched to Taxonomic Authority" tirCommon[ "acceptedauthorityurl"] = "Not Matched to Taxonomic Authority" if thisRecord["itis"]["MatchMethod"] != "Not Matched": tirCommon["scientificname"] = thisRecord["itis"]["nameWInd"] tirCommon["matchmethod"] = thisRecord["itis"]["MatchMethod"] tirCommon["taxonomicrank"] = thisRecord["itis"]["rank"]
tirRecord["properties"]["registration"]) thisRecord["itis"] = json.loads(tirRecord["properties"]["itis"]) thisRecord["worms"] = json.loads(tirRecord["properties"]["worms"]) if tirRecord["properties"]["sgcn"] is not None: thisRecord["sgcn"] = json.loads(tirRecord["properties"]["sgcn"]) _source = thisRecord["registration"]["source"] tirCommon = {} tirCommon["commonname"] = None tirCommon["authorityid"] = None tirCommon["rank"] = None tirCommon["matchmethod"] = None tirCommon["taxonomicgroup"] = None tirCommon["cachedate"] = datetime.utcnow().isoformat() tirCommon["scientificname"] = bis.stringCleaning( thisRecord["registration"]["scientificname"]) tirCommon["source"] = thisRecord["registration"]["source"] tirCommon["matchmethod"] = "Not Matched" tirCommon["authorityid"] = "Not Matched to Taxonomic Authority" tirCommon["rank"] = "Unknown Taxonomic Rank" if thisRecord["itis"]["MatchMethod"] != "Not Matched": tirCommon["scientificname"] = thisRecord["itis"]["nameWInd"] tirCommon["matchmethod"] = thisRecord["itis"]["MatchMethod"] tirCommon[ "authorityid"] = "http://services.itis.gov/?q=tsn:" + str( thisRecord["itis"]["tsn"]) tirCommon["rank"] = thisRecord["itis"]["rank"] elif thisRecord["worms"]["MatchMethod"] != "Not Matched": tirCommon["scientificname"] = thisRecord["worms"]["valid_name"] tirCommon["matchmethod"] = thisRecord["worms"]["MatchMethod"]
thisRecord["scientificname_submitted"] = "" else: thisRecord["scientificname_submitted"] = row["scientific name"] if index in duplicateNamesList: duplicateCount = duplicateCount + 1 if thisRecord["scientificname_submitted"] == "": thisRecord["scientificname_submitted"] = thisRecord[ "sgcn_state"] + " unnamed species " + str( duplicateCount) else: thisRecord["scientificname_submitted"] = thisRecord[ "scientificname_submitted"] + " " + str(duplicateCount) # Do basic cleaning on the name string in order to get it into the database thisRecord["scientificname_submitted"] = bis.stringCleaning( thisRecord["scientificname_submitted"]) if type(row['common name']) is float: thisRecord["commonname_submitted"] = "" else: thisRecord["commonname_submitted"] = bis.stringCleaning( row['common name']) thisRecord["taxonomicgroup_submitted"] = "" if 'taxonomy group' in stateData.columns: thisRecord["taxonomicgroup_submitted"] = bis.stringCleaning( row['taxonomy group']) elif 'taxonomic category' in stateData.columns: thisRecord["taxonomicgroup_submitted"] = bis.stringCleaning( row['taxonomic category'])
def queryTESS(queryType=None, criteria=None): import requests import xmltodict from datetime import datetime from bis import bis # These properties in TESS data often contain single quotes or other characters that need to be escaped in order for the resulting data to be inserted into databases like PostgreSQL keysToClean = ["COMNAME", "INVNAME"] listingStatusKeys = [ "STATUS_TEXT", "LISTING_DATE", "POP_ABBREV", "POP_DESC" ] tessData = {} tessData["dateCached"] = datetime.utcnow().isoformat() tessData["queryType"] = queryType tessData["criteria"] = criteria tessData["result"] = False if queryType is not None and criteria is not None: # The XQuery service from TESS wants string values in quotes if queryType != "TSN": criteria = '"' + criteria + '"' # Query the TESS XQuery service using queryType and criteria arguments queryURL = "https://ecos.fws.gov/ecp0/TessQuery?request=query&xquery=/SPECIES_DETAIL[" + queryType + "=" + criteria + "]" tessXML = requests.get(queryURL).text # Build an unordered dict from the TESS XML response (we don't care about ordering for our purposes here) tessDict = xmltodict.parse(tessXML, dict_constructor=dict) # Handle cases where there is more than one listing designation for a species if tessDict["results"] is not None and type( tessDict["results"]["SPECIES_DETAIL"]) is list: tessData["result"] = True tessData["ENTITY_ID"] = tessDict["results"]["SPECIES_DETAIL"][0][ "ENTITY_ID"] tessData["SPCODE"] = tessDict["results"]["SPECIES_DETAIL"][0][ "SPCODE"] tessData["VIPCODE"] = tessDict["results"]["SPECIES_DETAIL"][0][ "VIPCODE"] tessData["DPS"] = tessDict["results"]["SPECIES_DETAIL"][0]["DPS"] tessData["COUNTRY"] = tessDict["results"]["SPECIES_DETAIL"][0][ "COUNTRY"] tessData["INVNAME"] = bis.stringCleaning( tessDict["results"]["SPECIES_DETAIL"][0]["INVNAME"]) tessData["SCINAME"] = tessDict["results"]["SPECIES_DETAIL"][0][ "SCINAME"] tessData["COMNAME"] = bis.stringCleaning( tessDict["results"]["SPECIES_DETAIL"][0]["COMNAME"]) tessData["REFUGE_OCCURRENCE"] = tessDict["results"][ "SPECIES_DETAIL"][0]["REFUGE_OCCURRENCE"] tessData["FAMILY"] = tessDict["results"]["SPECIES_DETAIL"][0][ "FAMILY"] tessData["TSN"] = tessDict["results"]["SPECIES_DETAIL"][0]["TSN"] tessData["listingStatus"] = [] for speciesDetail in tessDict["results"]["SPECIES_DETAIL"]: thisStatus = {} thisStatus["STATUS"] = speciesDetail["STATUS_TEXT"] # If a species is not actually listed, there will not be a listing date if "LISTING_DATE" in speciesDetail: thisStatus["LISTING_DATE"] = speciesDetail["LISTING_DATE"] thisStatus["POP_DESC"] = bis.stringCleaning( speciesDetail["POP_DESC"]) thisStatus["POP_ABBREV"] = bis.stringCleaning( speciesDetail["POP_ABBREV"]) tessData["listingStatus"].append(thisStatus) # Handle cases where there is only a single listing status for a species by cleaning/popping a few keys and appending the rest of the result dict elif tessDict["results"] is not None and type( tessDict["results"]["SPECIES_DETAIL"]) is dict: tessData["result"] = True # Clean up the problematic string properties for key in keysToClean: tessData[key] = bis.stringCleaning( tessDict["results"]["SPECIES_DETAIL"][key]) tessDict["results"]["SPECIES_DETAIL"].pop(key, None) # Build the single listing status record for this species tessData["listingStatus"] = [] thisStatus = {} thisStatus["STATUS"] = tessDict["results"]["SPECIES_DETAIL"][ "STATUS_TEXT"] # If a species is not actually listed, there will not be a listing date if "LISTING_DATE" in tessDict["results"]["SPECIES_DETAIL"]: thisStatus["LISTING_DATE"] = tessDict["results"][ "SPECIES_DETAIL"]["LISTING_DATE"] thisStatus["POP_DESC"] = bis.stringCleaning( tessDict["results"]["SPECIES_DETAIL"]["POP_DESC"]) thisStatus["POP_ABBREV"] = bis.stringCleaning( tessDict["results"]["SPECIES_DETAIL"]["POP_ABBREV"]) tessData["listingStatus"].append(thisStatus) # Get rid of listing status information from the original dict for key in listingStatusKeys: tessDict["results"]["SPECIES_DETAIL"].pop(key, None) # Put the remaining properties into the record for this species tessData.update(tessDict["results"]["SPECIES_DETAIL"]) return tessData
def tessQuery(queryurl): import requests import xmltodict from datetime import datetime from bis import bis # These properties in TESS data often contain single quotes or other characters that need to be escaped in order for the resulting data to be inserted into databases like PostgreSQL keysToClean = ["COMNAME", "INVNAME"] listingStatusKeys = [ "STATUS_TEXT", "LISTING_DATE", "POP_ABBREV", "POP_DESC" ] tessData = {} tessData["cacheDate"] = datetime.utcnow().isoformat() tessData["result"] = False # Query the TESS XQuery service tessXML = requests.get(queryurl).text # Build an unordered dict from the TESS XML response (we don't care about ordering for our purposes here) tessDict = xmltodict.parse(tessXML, dict_constructor=dict) if "results" not in list(tessDict.keys()): return tessData # Handle cases where there is more than one listing designation for a species if tessDict["results"] is not None and type( tessDict["results"]["SPECIES_DETAIL"]) is list: tessData["result"] = True tessData["ENTITY_ID"] = tessDict["results"]["SPECIES_DETAIL"][0][ "ENTITY_ID"] tessData["SPCODE"] = tessDict["results"]["SPECIES_DETAIL"][0]["SPCODE"] tessData["VIPCODE"] = tessDict["results"]["SPECIES_DETAIL"][0][ "VIPCODE"] tessData["DPS"] = tessDict["results"]["SPECIES_DETAIL"][0]["DPS"] tessData["COUNTRY"] = tessDict["results"]["SPECIES_DETAIL"][0][ "COUNTRY"] tessData["INVNAME"] = bis.stringCleaning( tessDict["results"]["SPECIES_DETAIL"][0]["INVNAME"]) tessData["SCINAME"] = tessDict["results"]["SPECIES_DETAIL"][0][ "SCINAME"] tessData["COMNAME"] = bis.stringCleaning( tessDict["results"]["SPECIES_DETAIL"][0]["COMNAME"]) try: tessData["REFUGE_OCCURRENCE"] = bis.stringCleaning( tessDict["results"]["SPECIES_DETAIL"][0]["REFUGE_OCCURRENCE"]) except: pass tessData["FAMILY"] = tessDict["results"]["SPECIES_DETAIL"][0]["FAMILY"] tessData["TSN"] = tessDict["results"]["SPECIES_DETAIL"][0]["TSN"] tessData["listingStatus"] = [] for speciesDetail in tessDict["results"]["SPECIES_DETAIL"]: thisStatus = {} thisStatus["STATUS"] = speciesDetail["STATUS_TEXT"] # If a species is not actually listed, there will not be a listing date if "LISTING_DATE" in speciesDetail: thisStatus["LISTING_DATE"] = speciesDetail["LISTING_DATE"] # There are cases where population description information is missing from TESS records if "POP_DESC" in speciesDetail: thisStatus["POP_DESC"] = bis.stringCleaning( speciesDetail["POP_DESC"]) if "POP_ABBREV" in speciesDetail: thisStatus["POP_ABBREV"] = bis.stringCleaning( speciesDetail["POP_ABBREV"]) tessData["listingStatus"].append(thisStatus) # Handle cases where there is only a single listing status for a species by cleaning/popping a few keys and appending the rest of the result dict elif tessDict["results"] is not None and type( tessDict["results"]["SPECIES_DETAIL"]) is dict: tessData["result"] = True # Clean up the problematic string properties for key in keysToClean: tessData[key] = bis.stringCleaning( tessDict["results"]["SPECIES_DETAIL"][key]) tessDict["results"]["SPECIES_DETAIL"].pop(key, None) # Build the single listing status record for this species tessData["listingStatus"] = [] thisStatus = {} thisStatus["STATUS"] = tessDict["results"]["SPECIES_DETAIL"][ "STATUS_TEXT"] # If a species is not actually listed, there will not be a listing date if "LISTING_DATE" in tessDict["results"]["SPECIES_DETAIL"]: thisStatus["LISTING_DATE"] = tessDict["results"]["SPECIES_DETAIL"][ "LISTING_DATE"] # There are cases where population description information is missing from TESS records if "POP_DESC" in tessDict["results"]["SPECIES_DETAIL"]: thisStatus["POP_DESC"] = bis.stringCleaning( tessDict["results"]["SPECIES_DETAIL"]["POP_DESC"]) if "POP_ABBREV" in tessDict["results"]["SPECIES_DETAIL"]: thisStatus["POP_ABBREV"] = bis.stringCleaning( tessDict["results"]["SPECIES_DETAIL"]["POP_ABBREV"]) tessData["listingStatus"].append(thisStatus) # Get rid of listing status information from the original dict for key in listingStatusKeys: tessDict["results"]["SPECIES_DETAIL"].pop(key, None) # Put the remaining properties into the record for this species tessData.update(tessDict["results"]["SPECIES_DETAIL"]) return tessData