Example #1
0
# coding: utf-8

import requests, json
from IPython.display import display
from datetime import datetime, timedelta
import pandas as pd
from bis2 import gc2
from bis import bis
from bis import sgcn

# Set up the actions/targets for this particular instance
thisRun = {}
thisRun["instance"] = "DataDistillery"
thisRun["db"] = "BCB"
thisRun["readAPI"] = gc2.sqlAPI(thisRun["instance"], thisRun["db"], False)
thisRun["writeAPI"] = gc2.sqlAPI(thisRun["instance"], thisRun["db"], True)
thisRun["commitToDB"] = True
thisRun["totalRecordsToProcess"] = 1
thisRun["totalRecordsProcessed"] = 0
thisRun["dateCheck"] = None

# Retrieve information from stored files on the SGCN base repository item
sb_sgcnCollectionItem = requests.get(
    "https://www.sciencebase.gov/catalog/item/56d720ece4b015c306f442d5?format=json&fields=files"
).json()

for file in sb_sgcnCollectionItem["files"]:
    if file["title"] == "Configuration:Taxonomic Group Mappings":
        tgMappings = pd.read_table(file["url"], sep=",", encoding="utf-8")
    elif file["title"] == "Original 2005 SWAP National List for reference":
        swap2005 = pd.read_table(file["url"])
Example #2
0
    elif file["title"] == "Original 2005 SWAP National List for reference":
        swap2005 = pd.read_table(file["url"])

tgDict = {}
for index, row in tgMappings.iterrows():
    providedName = str(row["ProvidedName"])
    preferredName = str(row["PreferredName"])
    tgDict[providedName] = preferredName

# In[4]:

# Set up the actions/targets for this particular instance
thisRun = {}
thisRun["instance"] = "DataDistillery"
thisRun["db"] = "BCB"
thisRun["baseURL"] = gc2.sqlAPI(thisRun["instance"], thisRun["db"])
thisRun["commitToDB"] = True
thisRun["totalRecordsToProcess"] = 5000
thisRun["totalRecordsProcessed"] = 0

numberWithoutTIRData = 1

while numberWithoutTIRData == 1 and thisRun["totalRecordsProcessed"] < thisRun[
        "totalRecordsToProcess"]:

    q_recordToSearch = "SELECT id, registration->>'scientificname' AS name_submitted, itis->>'nameWInd' AS name_itis, worms->>'valid_name' AS name_worms FROM tir.tir WHERE registration->>'source' = 'SGCN' AND sgcn->>'dateCached' IS NULL LIMIT 1"
    recordToSearch = requests.get(thisRun["baseURL"] + "&q=" +
                                  q_recordToSearch).json()

    numberWithoutTIRData = len(recordToSearch["features"])
Example #3
0
# Note that this entire script needs to run in sequence. Because we are coming into this from the standpoint of external config files, it is much more efficient to use targeted SQL statements to update a whole set of records at a time in the TIR as opposed to looping every TIR record and pulling information from the config files.

# In[12]:

import requests
from IPython.display import display
import pandas as pd
from bis2 import gc2

# In[16]:

# Set up the actions/targets for this particular instance
thisRun = {}
thisRun["instance"] = "DataDistillery"
thisRun["db"] = "BCB"
thisRun["baseURL"] = gc2.sqlAPI(thisRun["instance"], thisRun["db"])
thisRun["commitToDB"] = True
thisRun["resetSGCN"] = False

# In[17]:

# Reset SGCN annotation in TIR
if thisRun["resetSGCN"]:
    print(
        requests.get(
            gc2.sqlAPI("DataDistillery", "BCB") +
            "&q=UPDATE tir.tir SET sgcn = NULL").json())

# In[18]:

# Retrieve information from stored files on the SGCN base repository item
Example #4
0
# In[1]:

import requests, re, json
from IPython.display import display
from bis import worms
from bis import bis
from bis import tir
from bis2 import gc2

# In[8]:

# Set up the actions/targets for this particular instance
thisRun = {}
thisRun["instance"] = "DataDistillery"
thisRun["db"] = "BCB"
thisRun["baseURL"] = gc2.sqlAPI(thisRun["instance"], thisRun["db"])
thisRun["commitToDB"] = True
thisRun["totalRecordsToProcess"] = 700
thisRun["totalRecordsProcessed"] = 0
thisRun[
    "wormsNameService"] = "http://www.marinespecies.org/rest/AphiaRecordsByName/"
thisRun[
    "wormsIDService"] = "http://www.marinespecies.org/rest/AphiaRecordByAphiaID/"

numberWithoutTIRData = 1

while numberWithoutTIRData == 1 and thisRun["totalRecordsProcessed"] < thisRun[
        "totalRecordsToProcess"]:
    q_recordToSearch = "SELECT id,         registration->>'source' AS source,         registration->>'followTaxonomy' AS followtaxonomy,         registration->>'taxonomicLookupProperty' AS taxonomiclookupproperty,         registration->>'scientificname' AS scientificname,         itis->>'nameWInd' AS nameWInd,         itis->>'nameWOInd' AS nameWOInd         FROM tir.tir         WHERE worms IS NULL         LIMIT 1"
    recordToSearch = requests.get(
        gc2.sqlAPI("DataDistillery", "BCB") + "&q=" + q_recordToSearch).json()
Example #5
0
from bis import tir
from bis import gap

# In[ ]:

sb = pysb.SbSession()
username = input("Username: "******"instance"] = "DataDistillery"
thisRun["db"] = "BCB"
thisRun["baseURL"] = gc2.sqlAPI(thisRun["instance"], thisRun["db"])

_habitatMapCollectionID = "527d0a83e4b0850ea0518326"
_gapSpecies = sb.get_child_ids(_habitatMapCollectionID)

for item in _gapSpecies:
    print(item)
    checkTIR = requests.get(
        thisRun["baseURL"] +
        "&q=SELECT id FROM tir.tir WHERE registration->>'id' = '" + item +
        "'").json()
    if len(checkTIR["features"]) == 0:
        gapSpecies = gap.gapToTIR(
            sb.get_item(item, {'fields': 'identifiers,tags'}))
        display(gapSpecies)
        print(
Example #6
0
import requests
import json
from datetime import datetime
from IPython.display import display
from bis import tir
from bis import bis
from bis2 import gc2

# In[2]:

# Set up the actions/targets for this particular instance
thisRun = {}
thisRun["instance"] = "DataDistillery"
thisRun["db"] = "BCB"
thisRun["baseURL"] = gc2.sqlAPI(thisRun["instance"], thisRun["db"])
thisRun["commitToDB"] = True
thisRun["totalRecordsToProcess"] = 1000
thisRun["totalRecordsProcessed"] = 0

numberWithoutTIRData = 1

while numberWithoutTIRData == 1 and thisRun["totalRecordsProcessed"] < thisRun[
        "totalRecordsToProcess"]:

    q_recordToSearch = "SELECT scientificname_submitted scientificname         FROM sgcn.sgcn         WHERE scientificname_submitted NOT IN         (SELECT registration->>'scientificname' AS scientificname FROM tir.tir WHERE registration->>'source' = 'SGCN')         GROUP BY scientificname_submitted         LIMIT 1"
    recordToSearch = requests.get(thisRun["baseURL"] + "&q=" +
                                  q_recordToSearch).json()

    numberWithoutTIRData = len(recordToSearch["features"])
# In[42]:

import requests,datetime
from IPython.display import display
from bis import tir
from bis2 import gc2


# ### Get data to process
# 
# This script gathers all of the unique taxonomic names (scientificname_submitted) from the SGCN data and registers them with the Taxonomic Information Registry. The query checks to make sure the taxon name is not already registered in the TIR.

# In[43]:

q_sgcn = "SELECT scientificname_submitted scientificname,     array_to_string(array_agg(DISTINCT CASE WHEN commonname_submitted <> '' THEN commonname_submitted ELSE NULL END),',') commonnames,     array_to_string(array_agg(DISTINCT CASE WHEN sgcn_state <> '' THEN sgcn_state ELSE NULL END),',') sgcnstates,     array_to_string(array_agg(DISTINCT CASE WHEN taxonomicgroup_submitted <> '' THEN taxonomicgroup_submitted ELSE NULL END),',') taxonomicgroups     FROM sgcn.sgcn     WHERE scientificname_submitted NOT IN (        SELECT registration->'scientificname' AS scientificname_submitted         FROM tir.tir         WHERE registration->'Source' = 'SGCN'     )     GROUP BY scientificname_submitted"
r_sgcn = requests.get(gc2.sqlAPI("DataDistillery","BCB")+"&q="+q_sgcn).json()


# ### Iterate over the data and process to TIR
# 
# This block iterates over the unique species returned, packages up the data for the registration, and inserts them into the TIR table. It requires the following:
# 
# * TIR table set up in the appropriate GC2-basede data schema
# * Registration field in the TIR table using the hstore data type
# * gc2 module from the BIS2 package (connection info for the API)
# * tir module from the BIS package (function to insert registration info)

# In[44]:

recordCount = 0