Python stringCleaning Examples, bis.bis.stringCleaning Python Examples

Example #1

0

Show file

def gapToTIR_flat(sbItem):
    from datetime import datetime
    from bis import bis

    _gapTaxonomicGroups = {}
    _gapTaxonomicGroups["m"] = "mammals"
    _gapTaxonomicGroups["b"] = "birds"
    _gapTaxonomicGroups["a"] = "amphibians"
    _gapTaxonomicGroups["r"] = "reptiles"

    newItem = {}
    newItem["sbdoc"] = sbItem
    
    newItem["source"] = "GAP Species"
    newItem["registrationDate"] = datetime.utcnow().isoformat()
    newItem["followTaxonomy"] = False
    newItem["taxonomicLookupProperty"] = "tsn"

    for tag in sbItem["tags"]:
        if tag["scheme"] == "https://www.sciencebase.gov/vocab/bis/tir/scientificname":
            newItem["scientificname"] = tag["name"]
        elif tag["scheme"] == "https://www.sciencebase.gov/vocab/bis/tir/commonname":
            newItem["commonname"] = bis.stringCleaning(tag["name"])

    for identifier in sbItem["identifiers"]:
        newItem[identifier["type"]] = identifier["key"]
        if identifier["type"] == "GAP_SpeciesCode":
            newItem["taxonomicgroup"] = _gapTaxonomicGroups[identifier["key"][:1]]
    
    return newItem

Example #2

0

Show file

def getTESSSearchURL(queryType, criteria):
    from bis import bis

    if queryType != "TSN":
        criteria = '"' + bis.stringCleaning(criteria) + '"'

    return "https://ecos.fws.gov/ecp0/TessQuery?request=query&xquery=/SPECIES_DETAIL[" + queryType + "=" + criteria + "]"

Example #3

0

Show file

File: bison.py Project: blohre-usgs/bis

def getBISONSearchURL(queryType, criteria):
    from bis import bis

    _baseURL = "https://bison.usgs.gov/api/search.json?count=1&"

    if queryType != "TSN":
        return _baseURL + "type=scientific_name&species=" + bis.stringCleaning(
            criteria)
    else:
        return _baseURL + "tsn=" + str(criteria)

Example #4

0

Show file

def packageITISJSON(itisDoc):
    from datetime import datetime
    from bis import bis
    itisData = {}
    itisData["cacheDate"] = datetime.utcnow().isoformat()

    if type(itisDoc) is not int:
        # Get rid of parts of the ITIS doc that we don't want/need to cache
        primaryKeysToPop = [
            "_version_", "credibilityRating", "expert", "geographicDivision",
            "hierarchicalSort", "hierarchyTSN", "jurisdiction", "publication",
            "rankID", "otherSource", "taxonAuthor", "comment"
        ]

        for key in primaryKeysToPop:
            itisDoc.pop(key, None)

        # Make a clean structure of the taconomic hierarchy
        itisDoc["taxonomy"] = []
        for rank in itisDoc['hierarchySoFarWRanks'][
                0][itisDoc['hierarchySoFarWRanks'][0].find(':$') +
                   2:-1].split("$"):
            thisRankName = {}
            thisRankName["rank"] = rank.split(":")[0]
            thisRankName["name"] = rank.split(":")[1]
            itisDoc["taxonomy"].append(thisRankName)
        itisDoc.pop("hierarchySoFarWRanks", None)

        # Make a clean, usable list of the hierarchy so far for display or listing
        itisDoc["hierarchy"] = itisDoc["hierarchySoFar"][0].split(
            ":")[1][1:-1].split("$")
        itisDoc.pop("hierarchySoFar", None)

        # Make a clean structure of common names
        if "vernacular" in itisDoc:
            itisDoc["commonnames"] = []
            for commonName in itisDoc['vernacular']:
                thisCommonName = {}
                thisCommonName["name"] = bis.stringCleaning(
                    commonName.split('$')[1])
                thisCommonName["language"] = commonName.split('$')[2]
                itisDoc["commonnames"].append(thisCommonName)
            itisDoc.pop("vernacular", None)

        # Add the new ITIS doc to the ITIS data structure and return
        itisData.update(itisDoc)

    return itisData

Example #5

0

Show file

File: worms.py Project: aulenbac/bis

def packageWoRMSJSON(matchMethod, matchString, wormsDoc):
    from datetime import datetime
    from bis import bis
    wormsData = {}
    wormsData["cacheDate"] = datetime.utcnow().isoformat()
    wormsData["MatchMethod"] = matchMethod
    wormsData["MatchString"] = bis.stringCleaning(matchString)

    if type(wormsDoc) is not int:
        # Remove WoRMS properties that we don't want/need to cache
        keysToPop = ["authority", "citation", "valid_authority", "url"]
        for key in keysToPop:
            wormsDoc.pop(key, None)

        wormsData.update(wormsDoc)

    return wormsData

Example #6

0

Show file

File: sgcn.py Project: aulenbac/bis

def getSGCNStatesByYear(baseURL, scientificname):
    import requests
    from bis import bis
    _taxonomicgroup = None
    q = "SELECT sgcn_year, array_to_string(array_agg(sgcn_state), ',') states FROM sgcn.sgcn WHERE scientificname_submitted = '" + bis.stringCleaning(
        scientificname) + "' GROUP BY sgcn_year"
    r = requests.get(baseURL + "&q=" + q).json()
    if len(r["features"]) > 0:
        stateLists = []
        for feature in r["features"]:
            stateLists.append(feature["properties"])
    return stateLists

Example #7

0

Show file

File: sgcn.py Project: aulenbac/bis

def getSGCNTaxonomicGroup(baseURL, scientificname):
    import requests
    from bis import bis
    _taxonomicgroup = None
    q = "SELECT taxonomicgroup_submitted FROM sgcn.sgcn WHERE scientificname_submitted = '" + bis.stringCleaning(
        scientificname
    ) + "' AND taxonomicgroup_submitted <> '' ORDER BY dateinserted ASC LIMIT 1"
    r = requests.get(baseURL + "&q=" + q).json()
    if len(r["features"]) > 0:
        _taxonomicgroup = r["features"][0]["properties"][
            "taxonomicgroup_submitted"]
    return _taxonomicgroup

Example #8

0

Show file

                thisRecord["itis"]["MatchMethod"] = "Fuzzy Match"

        thisRecord["worms"] = json.loads(
            recordToSearch["features"][0]["properties"]["worms"])
        thisRecord["sgcn"] = json.loads(
            recordToSearch["features"][0]["properties"]["sgcn"])

        tirCommon = {}
        tirCommon["cachedate"] = datetime.utcnow().isoformat()

        tirCommon["tirid"] = recordToSearch["features"][0]["properties"]["id"]
        tirCommon["tirapi"] = thisRun[
            "readAPI"] + "?q=SELECT * FROM tir.tir WHERE id=" + str(
                tirCommon["tirid"])

        tirCommon["registeredname"] = bis.stringCleaning(
            thisRecord["registration"]["scientificname"])

        tirCommon["scientificname"] = tirCommon["registeredname"]
        tirCommon["commonname"] = None
        tirCommon["taxonomicgroup"] = "Other"
        tirCommon["taxonomicrank"] = "Unknown Taxonomic Rank"
        tirCommon["matchmethod"] = "Not Matched"
        tirCommon[
            "acceptedauthorityapi"] = "Not Matched to Taxonomic Authority"
        tirCommon[
            "acceptedauthorityurl"] = "Not Matched to Taxonomic Authority"

        if thisRecord["itis"]["MatchMethod"] != "Not Matched":
            tirCommon["scientificname"] = thisRecord["itis"]["nameWInd"]
            tirCommon["matchmethod"] = thisRecord["itis"]["MatchMethod"]
            tirCommon["taxonomicrank"] = thisRecord["itis"]["rank"]

Example #9

0

Show file

            tirRecord["properties"]["registration"])
        thisRecord["itis"] = json.loads(tirRecord["properties"]["itis"])
        thisRecord["worms"] = json.loads(tirRecord["properties"]["worms"])
        if tirRecord["properties"]["sgcn"] is not None:
            thisRecord["sgcn"] = json.loads(tirRecord["properties"]["sgcn"])
        _source = thisRecord["registration"]["source"]

        tirCommon = {}
        tirCommon["commonname"] = None
        tirCommon["authorityid"] = None
        tirCommon["rank"] = None
        tirCommon["matchmethod"] = None
        tirCommon["taxonomicgroup"] = None
        tirCommon["cachedate"] = datetime.utcnow().isoformat()

        tirCommon["scientificname"] = bis.stringCleaning(
            thisRecord["registration"]["scientificname"])
        tirCommon["source"] = thisRecord["registration"]["source"]
        tirCommon["matchmethod"] = "Not Matched"
        tirCommon["authorityid"] = "Not Matched to Taxonomic Authority"
        tirCommon["rank"] = "Unknown Taxonomic Rank"

        if thisRecord["itis"]["MatchMethod"] != "Not Matched":
            tirCommon["scientificname"] = thisRecord["itis"]["nameWInd"]
            tirCommon["matchmethod"] = thisRecord["itis"]["MatchMethod"]
            tirCommon[
                "authorityid"] = "http://services.itis.gov/?q=tsn:" + str(
                    thisRecord["itis"]["tsn"])
            tirCommon["rank"] = thisRecord["itis"]["rank"]
        elif thisRecord["worms"]["MatchMethod"] != "Not Matched":
            tirCommon["scientificname"] = thisRecord["worms"]["valid_name"]
            tirCommon["matchmethod"] = thisRecord["worms"]["MatchMethod"]

Example #10

0

Show file

                thisRecord["scientificname_submitted"] = ""
            else:
                thisRecord["scientificname_submitted"] = row["scientific name"]

            if index in duplicateNamesList:
                duplicateCount = duplicateCount + 1
                if thisRecord["scientificname_submitted"] == "":
                    thisRecord["scientificname_submitted"] = thisRecord[
                        "sgcn_state"] + " unnamed species " + str(
                            duplicateCount)
                else:
                    thisRecord["scientificname_submitted"] = thisRecord[
                        "scientificname_submitted"] + " " + str(duplicateCount)

            # Do basic cleaning on the name string in order to get it into the database
            thisRecord["scientificname_submitted"] = bis.stringCleaning(
                thisRecord["scientificname_submitted"])

            if type(row['common name']) is float:
                thisRecord["commonname_submitted"] = ""
            else:
                thisRecord["commonname_submitted"] = bis.stringCleaning(
                    row['common name'])

            thisRecord["taxonomicgroup_submitted"] = ""
            if 'taxonomy group' in stateData.columns:
                thisRecord["taxonomicgroup_submitted"] = bis.stringCleaning(
                    row['taxonomy group'])
            elif 'taxonomic category' in stateData.columns:
                thisRecord["taxonomicgroup_submitted"] = bis.stringCleaning(
                    row['taxonomic category'])

Example #11

0

Show file

def queryTESS(queryType=None, criteria=None):
    import requests
    import xmltodict
    from datetime import datetime
    from bis import bis

    # These properties in TESS data often contain single quotes or other characters that need to be escaped in order for the resulting data to be inserted into databases like PostgreSQL
    keysToClean = ["COMNAME", "INVNAME"]

    listingStatusKeys = [
        "STATUS_TEXT", "LISTING_DATE", "POP_ABBREV", "POP_DESC"
    ]

    tessData = {}
    tessData["dateCached"] = datetime.utcnow().isoformat()
    tessData["queryType"] = queryType
    tessData["criteria"] = criteria
    tessData["result"] = False

    if queryType is not None and criteria is not None:
        # The XQuery service from TESS wants string values in quotes
        if queryType != "TSN":
            criteria = '"' + criteria + '"'

        # Query the TESS XQuery service using queryType and criteria arguments
        queryURL = "https://ecos.fws.gov/ecp0/TessQuery?request=query&xquery=/SPECIES_DETAIL[" + queryType + "=" + criteria + "]"
        tessXML = requests.get(queryURL).text

        # Build an unordered dict from the TESS XML response (we don't care about ordering for our purposes here)
        tessDict = xmltodict.parse(tessXML, dict_constructor=dict)

        # Handle cases where there is more than one listing designation for a species
        if tessDict["results"] is not None and type(
                tessDict["results"]["SPECIES_DETAIL"]) is list:
            tessData["result"] = True
            tessData["ENTITY_ID"] = tessDict["results"]["SPECIES_DETAIL"][0][
                "ENTITY_ID"]
            tessData["SPCODE"] = tessDict["results"]["SPECIES_DETAIL"][0][
                "SPCODE"]
            tessData["VIPCODE"] = tessDict["results"]["SPECIES_DETAIL"][0][
                "VIPCODE"]
            tessData["DPS"] = tessDict["results"]["SPECIES_DETAIL"][0]["DPS"]
            tessData["COUNTRY"] = tessDict["results"]["SPECIES_DETAIL"][0][
                "COUNTRY"]
            tessData["INVNAME"] = bis.stringCleaning(
                tessDict["results"]["SPECIES_DETAIL"][0]["INVNAME"])
            tessData["SCINAME"] = tessDict["results"]["SPECIES_DETAIL"][0][
                "SCINAME"]
            tessData["COMNAME"] = bis.stringCleaning(
                tessDict["results"]["SPECIES_DETAIL"][0]["COMNAME"])
            tessData["REFUGE_OCCURRENCE"] = tessDict["results"][
                "SPECIES_DETAIL"][0]["REFUGE_OCCURRENCE"]
            tessData["FAMILY"] = tessDict["results"]["SPECIES_DETAIL"][0][
                "FAMILY"]
            tessData["TSN"] = tessDict["results"]["SPECIES_DETAIL"][0]["TSN"]

            tessData["listingStatus"] = []

            for speciesDetail in tessDict["results"]["SPECIES_DETAIL"]:
                thisStatus = {}
                thisStatus["STATUS"] = speciesDetail["STATUS_TEXT"]
                # If a species is not actually listed, there will not be a listing date
                if "LISTING_DATE" in speciesDetail:
                    thisStatus["LISTING_DATE"] = speciesDetail["LISTING_DATE"]
                thisStatus["POP_DESC"] = bis.stringCleaning(
                    speciesDetail["POP_DESC"])
                thisStatus["POP_ABBREV"] = bis.stringCleaning(
                    speciesDetail["POP_ABBREV"])
                tessData["listingStatus"].append(thisStatus)

        # Handle cases where there is only a single listing status for a species by cleaning/popping a few keys and appending the rest of the result dict
        elif tessDict["results"] is not None and type(
                tessDict["results"]["SPECIES_DETAIL"]) is dict:
            tessData["result"] = True

            # Clean up the problematic string properties
            for key in keysToClean:
                tessData[key] = bis.stringCleaning(
                    tessDict["results"]["SPECIES_DETAIL"][key])
                tessDict["results"]["SPECIES_DETAIL"].pop(key, None)

            # Build the single listing status record for this species
            tessData["listingStatus"] = []
            thisStatus = {}
            thisStatus["STATUS"] = tessDict["results"]["SPECIES_DETAIL"][
                "STATUS_TEXT"]
            # If a species is not actually listed, there will not be a listing date
            if "LISTING_DATE" in tessDict["results"]["SPECIES_DETAIL"]:
                thisStatus["LISTING_DATE"] = tessDict["results"][
                    "SPECIES_DETAIL"]["LISTING_DATE"]
            thisStatus["POP_DESC"] = bis.stringCleaning(
                tessDict["results"]["SPECIES_DETAIL"]["POP_DESC"])
            thisStatus["POP_ABBREV"] = bis.stringCleaning(
                tessDict["results"]["SPECIES_DETAIL"]["POP_ABBREV"])
            tessData["listingStatus"].append(thisStatus)

            # Get rid of listing status information from the original dict
            for key in listingStatusKeys:
                tessDict["results"]["SPECIES_DETAIL"].pop(key, None)

            # Put the remaining properties into the record for this species
            tessData.update(tessDict["results"]["SPECIES_DETAIL"])

    return tessData

Example #12

0

Show file

def tessQuery(queryurl):
    import requests
    import xmltodict
    from datetime import datetime
    from bis import bis

    # These properties in TESS data often contain single quotes or other characters that need to be escaped in order for the resulting data to be inserted into databases like PostgreSQL
    keysToClean = ["COMNAME", "INVNAME"]

    listingStatusKeys = [
        "STATUS_TEXT", "LISTING_DATE", "POP_ABBREV", "POP_DESC"
    ]

    tessData = {}
    tessData["cacheDate"] = datetime.utcnow().isoformat()
    tessData["result"] = False

    # Query the TESS XQuery service
    tessXML = requests.get(queryurl).text

    # Build an unordered dict from the TESS XML response (we don't care about ordering for our purposes here)
    tessDict = xmltodict.parse(tessXML, dict_constructor=dict)

    if "results" not in list(tessDict.keys()):
        return tessData

    # Handle cases where there is more than one listing designation for a species
    if tessDict["results"] is not None and type(
            tessDict["results"]["SPECIES_DETAIL"]) is list:
        tessData["result"] = True
        tessData["ENTITY_ID"] = tessDict["results"]["SPECIES_DETAIL"][0][
            "ENTITY_ID"]
        tessData["SPCODE"] = tessDict["results"]["SPECIES_DETAIL"][0]["SPCODE"]
        tessData["VIPCODE"] = tessDict["results"]["SPECIES_DETAIL"][0][
            "VIPCODE"]
        tessData["DPS"] = tessDict["results"]["SPECIES_DETAIL"][0]["DPS"]
        tessData["COUNTRY"] = tessDict["results"]["SPECIES_DETAIL"][0][
            "COUNTRY"]
        tessData["INVNAME"] = bis.stringCleaning(
            tessDict["results"]["SPECIES_DETAIL"][0]["INVNAME"])
        tessData["SCINAME"] = tessDict["results"]["SPECIES_DETAIL"][0][
            "SCINAME"]
        tessData["COMNAME"] = bis.stringCleaning(
            tessDict["results"]["SPECIES_DETAIL"][0]["COMNAME"])
        try:
            tessData["REFUGE_OCCURRENCE"] = bis.stringCleaning(
                tessDict["results"]["SPECIES_DETAIL"][0]["REFUGE_OCCURRENCE"])
        except:
            pass
        tessData["FAMILY"] = tessDict["results"]["SPECIES_DETAIL"][0]["FAMILY"]
        tessData["TSN"] = tessDict["results"]["SPECIES_DETAIL"][0]["TSN"]

        tessData["listingStatus"] = []

        for speciesDetail in tessDict["results"]["SPECIES_DETAIL"]:
            thisStatus = {}
            thisStatus["STATUS"] = speciesDetail["STATUS_TEXT"]
            # If a species is not actually listed, there will not be a listing date
            if "LISTING_DATE" in speciesDetail:
                thisStatus["LISTING_DATE"] = speciesDetail["LISTING_DATE"]
            # There are cases where population description information is missing from TESS records
            if "POP_DESC" in speciesDetail:
                thisStatus["POP_DESC"] = bis.stringCleaning(
                    speciesDetail["POP_DESC"])
            if "POP_ABBREV" in speciesDetail:
                thisStatus["POP_ABBREV"] = bis.stringCleaning(
                    speciesDetail["POP_ABBREV"])
            tessData["listingStatus"].append(thisStatus)

    # Handle cases where there is only a single listing status for a species by cleaning/popping a few keys and appending the rest of the result dict
    elif tessDict["results"] is not None and type(
            tessDict["results"]["SPECIES_DETAIL"]) is dict:
        tessData["result"] = True

        # Clean up the problematic string properties
        for key in keysToClean:
            tessData[key] = bis.stringCleaning(
                tessDict["results"]["SPECIES_DETAIL"][key])
            tessDict["results"]["SPECIES_DETAIL"].pop(key, None)

        # Build the single listing status record for this species
        tessData["listingStatus"] = []
        thisStatus = {}
        thisStatus["STATUS"] = tessDict["results"]["SPECIES_DETAIL"][
            "STATUS_TEXT"]
        # If a species is not actually listed, there will not be a listing date
        if "LISTING_DATE" in tessDict["results"]["SPECIES_DETAIL"]:
            thisStatus["LISTING_DATE"] = tessDict["results"]["SPECIES_DETAIL"][
                "LISTING_DATE"]
        # There are cases where population description information is missing from TESS records
        if "POP_DESC" in tessDict["results"]["SPECIES_DETAIL"]:
            thisStatus["POP_DESC"] = bis.stringCleaning(
                tessDict["results"]["SPECIES_DETAIL"]["POP_DESC"])
        if "POP_ABBREV" in tessDict["results"]["SPECIES_DETAIL"]:
            thisStatus["POP_ABBREV"] = bis.stringCleaning(
                tessDict["results"]["SPECIES_DETAIL"]["POP_ABBREV"])
        tessData["listingStatus"].append(thisStatus)

        # Get rid of listing status information from the original dict
        for key in listingStatusKeys:
            tessDict["results"]["SPECIES_DETAIL"].pop(key, None)

        # Put the remaining properties into the record for this species
        tessData.update(tessDict["results"]["SPECIES_DETAIL"])

    return tessData