Example #1
0
def download_GAP_range_CONUS2001v1(gap_id, toDir):
    """
    Downloads GAP Range CONUS 2001 v1 file and returns path to the unzipped
    file.  NOTE: doesn't include extension in returned path so that you can
    specify if you want csv or shp or xml when you use the path.
    """
    import sciencebasepy
    import zipfile

    # Connect
    sb = sciencebasepy.SbSession()

    # Search for gap range item in ScienceBase
    gap_id = gap_id[0] + gap_id[1:5].upper() + gap_id[5]
    item_search = '{0}_CONUS_2001v1 Range Map'.format(gap_id)
    items = sb.find_items_by_any_text(item_search)

    # Get a public item.  No need to log in.
    rng = items['items'][0]['id']
    item_json = sb.get_item(rng)
    get_files = sb.get_item_files(item_json, toDir)

    # Unzip
    rng_zip = toDir + item_json['files'][0]['name']
    zip_ref = zipfile.ZipFile(rng_zip, 'r')
    zip_ref.extractall(toDir)
    zip_ref.close()

    # Return path to range file without extension
    return rng_zip.replace('.zip', '')
Example #2
0
def download_GAP_model_CONUS2001v1(gap_id, toDir):
    """
    Gets GAP habitat models as JSONs.  Pulls out summer NE and SE.  
    Returns a list of dictionaries.
    """
    import sciencebasepy
    import json

    # Connect
    sb = sciencebasepy.SbSession()

    # Search for gap range item in ScienceBase
    gap_id = gap_id[0] + gap_id[1:5].upper() + gap_id[5]
    item_search = '{0}_CONUS_HabModel_2001v1.json'.format(gap_id)
    items = sb.find_items_by_any_text(item_search)

    # Get a public item.  No need to log in.
    mod = items['items'][0]['id']
    item_json = sb.get_item(mod)
    get_files = sb.get_item_files(item_json, toDir)

    # Read in json file
    models = json.load(open(toDir + gap_id + "_CONUS_HabModel_2001v1.json"))
    models = [
        models["models"][gap_id + "-s6"], models["models"][gap_id + "-s3"]
    ]

    if models[0]['ysnHandModel'] == True or models[1]['ysnHandModel'] == True:
        print('handmodel')
    else:
        return models
Example #3
0
def stinson2019():
    """Downloads all files from ScienceBase item number "5d67eacae4b0c4f70cf15be3"

    ## Data website ##
    https://www.sciencebase.gov/catalog/item/5d67eacae4b0c4f70cf15be3

    ## Citation ##
    Stinson, K., Rapp, J., Ahmed, S., Lutz, D., Huish, R., Dufour, B., and Morelli, T.L., 2019,
    Sap Quantity at Study Sites in the Northeast: U.S. Geological Survey data release, https://doi.org/10.5066/P9H65YCC.
    """

    sb = sciencebasepy.SbSession()
    raw_path = os.path.join("data", "raw", "stinson2019")
    processed_path = os.path.join("data", "processed", "stinson2019")

    if not os.path.exists(raw_path):
        os.makedirs(raw_path)

    if not os.path.exists(processed_path):
        os.makedirs(processed_path)

    item_json = sb.get_item("5d67eacae4b0c4f70cf15be3")
    sb.get_item_files(item_json, raw_path)

    # Convert data csv files to dataframes and pickle
    df = pd.read_csv(os.path.join(raw_path, 'ACERnet_sap_2012_2017_ID.csv'),
                     parse_dates=['Date', 'Year'])
    df.columns = [x.lower().replace('.', '_') for x in list(df.columns)]
    df.to_pickle(os.path.join(processed_path, 'stinson2019_df'))

    locations = pd.read_csv(os.path.join(raw_path, 'ACERnet_LatLon.csv'))
    locations.to_pickle(os.path.join(processed_path, 'stinson2019_locations'))
def _get_url(params: Dict[str, str]):
    """
    Call Science Base API with the argument params and return list of download URLs
    :param params: Science Base params object
    :return: List of HTTPS download URLs for items on S3
    """

    log = Logger('Download')
    log.info('Science base query: {}'.format(params))

    sb = sciencebasepy.SbSession()
    items = sb.find_items(params)

    log.info('{} Science base item(s) identified.'.format(items['total']))

    urls = []
    while items and 'items' in items:
        for item in items['items']:
            result = sb.get_item(item['id'])
            for weblink in result['webLinks']:
                if weblink['type'] == 'download':
                    urls.append(weblink['uri'])

        # pylintrc is freaking out about sb.next being "not callable"
        # I don't know what that means but it's just an annoyance
        items = sb.next(items)

    return urls
Example #5
0
    def get_zip_file_and_extract(self):
        """
        Get item JSON, download zipfile, and extract it in current working directory
        :return:
        """
        sb = pysb.SbSession()
        item = sb.get_item(self.item_id)
        zip_file = self.get_zip_file(item)
        download_uri = zip_file["downloadUri"]
        file_size = zip_file["size"]

        if download_uri is not None:
            self.zip_file = self.item_id + zip_file["name"]
            self.download_file(download_uri, file_size)
            self.extract_zip_file()
        else:
            raise Exception("No URI was found for zipfile download")
Example #6
0
def download_GAP_range_CONUS2001v1(gap_id, toDir):
    """
    Downloads GAP Range CONUS 2001 v1 file and returns path to the unzipped
    file.  NOTE: doesn't include extension in returned path so that you can
    specify if you want csv or shp or xml when you use the path.
    """
    import sciencebasepy
    import zipfile
    import requests
    from io import BytesIO

    # Connect
    sb = sciencebasepy.SbSession()

    # Search for gap range item in ScienceBase
    gap_id = gap_id[0] + gap_id[1:5].upper() + gap_id[5]
    item_search = '{0}_CONUS_2001v1 Range Map'.format(gap_id)
    items = sb.find_items_by_any_text(item_search)

    # Get a public item.  No need to log in.
    rngID = items['items'][0]['id']
    item_json = sb.get_item(rngID)
    flst = item_json['files']
    zname = '{0}_CONUS_Range_2001v1.zip'.format(gap_id)
    # Use the GetIndex function to find the zip file's index value in the
    # JSON item's files list dictionaries of name keys
    zip_index = GetIndex(flst, 'name', zname)
    # Here's a way to do this without using the GetIndex function created above
    #zip_index=next((index for (index, d) in enumerate(flst) if d["name"] == zname), None)
    # Get the URL to the zip file containing the HUC CSV
    rngzipURL = item_json['files'][zip_index]['url']
    r = requests.get(rngzipURL)
    z = zipfile.ZipFile(BytesIO(r.content))

    # Get ONLY the HUC CSV file and extract it to the designated directory
    rngCSV = [
        y for y in sorted(z.namelist()) for end in ['csv'] if y.endswith(end)
    ]
    csvFile = z.extract(rngCSV[0], toDir)
    z.close()

    # Return the extracted range CSV
    return csvFile
"""
Change folder name to match XML title
"""
rename_dirs_from_xmls(parentdir)

#%% Create SB page structure
"""
Create SB page structure: nested child pages following directory hierarchy
Inputs: parent directory, landing page ID
This one should overwrite the entire data release (excluding the landing page).
"""
# Check whether logged in.
if not sb.is_logged_in():
    print('Logging back in...')
    try:
        sb = pysb.SbSession(env=None).login(useremail,password)
    except NameError:
        sb = pysb.SbSession(env=None).loginc(useremail)

# If there's no id_to_json.json file available, we need to create the subpage structure.
if not update_subpages and not os.path.isfile(os.path.join(parentdir,'id_to_json.json')):
    print("id_to_json.json file is not in parent directory, so we will perform update_subpages routine.")
    update_subpages = True

if update_subpages:
    dict_DIRtoID = setup_subparents(sb, parentdir, landing_id, imagefile)
    # Save dictionaries
    with open(os.path.join(parentdir,'dir_to_id.json'), 'w') as f:
        json.dump(dict_DIRtoID, f)
else: # Import pre-created dictionaries if all SB pages exist
    with open(os.path.join(parentdir,'dir_to_id.json'), 'r') as f:
# =============================================================================
import sciencebasepy as sb
import getpass
import pandas as pd

# =============================================================================
# Parameters
# =============================================================================
page_url = "5ad77f06e4b0e2c2dd25e798"
username = "******"
password = getpass.getpass()

# =============================================================================
# login and get child ids
# =============================================================================
sb_session = sb.SbSession()
sb_session.login(username, password)

child_ids = sb_session.get_child_ids(page_url)
# =============================================================================
# Change json
# =============================================================================
data = []

for child_id in child_ids:
    try:
        child_json = sb_session.get_item(child_id)
    except:
        print("---> skipping child id {0}".format(child_id))
        continue
Example #9
0
import pandas as pd
import json
import sciencebasepy as sb
import os
import glob

# Set paths to data
projDir = "P:/Proj6/GAP-WVBA/"
dataDir = projDir + "Data/"
habitatDir = projDir + "Data/habmaps/"
listDir = dataDir + 'Specieslists/WV_AtlasCodes.csv'
resultsCSV = projDir + "Results/elevation_summary.csv"
toDir = "C:/Temp/"
#toDir = "T:/Temp/"
# Connect
sb = sb.SbSession()
"""
# Read in elevation.csv as a dataframe and save a copy in the archive, 
#run only once as spp list will be reread and duplicated each time
inDF = pd.read_csv(resultsCSV, dtype={'GAP_code': 'string', 
                                      'common_name': 'string'})
timestamp = str(datetime.now(tz=None).strftime("%d%B%Y_%I%M%p"))
archiveCSV = projDir + "/Results/Archive/elevation_" + timestamp + ".csv"
newDF = inDF.copy(deep=True)
sppList = pd.read_csv(listDir, dtype={'strUC': 'string',
                                      'strCommonName': 'string'}) 
sppList.rename(columns = {'strUC':'GAP_code'}, inplace = True)
sppList.rename(columns = {'strCommonName':'common_name'}, inplace = True)
sppList.drop(['state_name', 'strScientificName_x','intHa', 
              'strScientificName_y', 'N_birds', 'N_points', 'Det_Rate'], 
             axis='columns', index=None, columns=None, 
Example #10
0
    usage()
    sys.exit(1)
if username is None:
    username, password = get_user_pw_from_file('~/.pw')
    if username is None:
        print('No user specified')
        usage()
        sys.exit(2)
if sbenv is None:
    print('No environment specified')
    usage()
    sys.exit(3)

if istest:
    print('user='******'env=' + sbenv),
    print('items=' + str(item_ids)),
    print('undelete? ' + str(undelete)),
    print('children? ' + str(delete_only_children)),
    print('batchsize=' + str(batchsize))

if password:
    sb = pysb.SbSession(sbenv).login(username, password)
else:
    sb = pysb.SbSession(sbenv).loginc(username)

for item_id in item_ids:
    if undelete:
        undelete_items(item_ids)
    else:
        delete_items(item_ids, batchsize, delete_only_children)
Example #11
0
CONUSArea = 8103534.7  # 12-Digit HUC CONUS total area in km2
nHUCs = 82717.0  # Number of 12-digit HUCS in CONUS
cntLC = 9000763993.0  # Cell count of CONUS landcover excluding 0s
cntLCnoW = 8501572144.0  # Cell count of CONUS landcover excluding 0s and water

# Make an empty master dataframe
dfMaster = pd.DataFrame()
'''
    Connect to ScienceBase to pull down a species list
    This uses the ScienceBase item for species habitat maps
    and searches for a CSV file with species info in it.
    The habitat map item has a unique id (527d0a83e4b0850ea0518326).
    If this changes, the code will need to be re-written.

'''
sb = sciencebasepy.SbSession()
habmapItem = sb.get_item("527d0a83e4b0850ea0518326")
# Make a regular expression variable for the hab map csv file name pattern
fnp = 'ScienceBaseHabMapCSV.+'
for file in habmapItem["files"]:
    # Search for the file name pattern in the hab map item files dictionary
    fnMatch = re.search(fnp, file['name'])
    if fnMatch != None:
        try:
            dfSppCSV = pd.read_csv(StringIO(sb.get(file["url"])))
        except:
            print('!! Could not find a CSV file name match !!')

# Check to make sure the CSV file was returned
if dfSppCSV is not None:
    print('-' * 55)
Example #12
0
 def __init__(self):
     self.sbpy = sciencebasepy.SbSession()