def _download_features( feature_ids, path=None, check_modified=False, ): if path is None: path = os.path.join(util.get_ulmo_dir(), DEFAULT_FILE_PATH) if isinstance(feature_ids, basestring): feature_ids = [feature_ids] tiles = [] tile_fmt = '.img' for feature_id in feature_ids: url = SCIENCEBASE_ITEM_URL % feature_id metadata = requests.get(url).json() layer = [a for a in list(layer_dict.keys()) if a in metadata['title']][0] layer_path = os.path.join(path, layer_dict[layer]) tile_urls = [ link['uri'] for link in metadata['webLinks'] if link['type'] == 'download' ] tiles.append({ 'feature_id': feature_id, 'tiles': util.download_tiles(layer_path, tile_urls, tile_fmt, check_modified), }) return tiles
def get_data(county=None, start=None, end=None, as_dataframe=False, data_dir=None): """Retreives data. Parameters ---------- county : ``None`` or str If specified, results will be limited to the county corresponding to the given 5-character Texas county fips code i.e. 48???. end : ``None`` or date (see :ref:`dates-and-times`) Results will be limited to data on or before this date. Default is the current date. start : ``None`` or date (see :ref:`dates-and-times`) Results will be limited to data on or after this date. Default is the start of the calendar year for the end date. as_dataframe: bool If ``False`` (default), a dict with a nested set of dicts will be returned with data indexed by 5-character Texas county FIPS code. If ``True`` then a pandas.DataFrame object will be returned. The pandas dataframe is used internally, so setting this to ``True`` is a little bit faster as it skips a serialization step. data_dir : ``None`` or directory path Directory for holding downloaded data files. If no path is provided (default), then a user-specific directory for holding application data will be used (the directory will depend on the platform/operating system). Returns ------- data : dict or pandas.Dataframe A dict or pandas.DataFrame representing the data. See the ``as_dataframe`` parameter for more. """ if end is None: end_date = datetime.date.today() else: end_date = util.convert_date(end) if start is None: start_date = datetime.date(end_date.year, 1, 1) else: start_date = util.convert_date(start) if data_dir is None: data_dir = os.path.join(util.get_ulmo_dir(), 'twc/kbdi') df = pandas.concat([ _date_dataframe(date, data_dir) for date in pandas.period_range(start_date, end_date, freq='D') ], ignore_index=True) fips_df = _fips_dataframe() df = pandas.merge(df, fips_df, left_on='county', right_on='name') del df['name'] if county: df = df[df['fips'] == county] if as_dataframe: return df else: return _as_data_dict(df)
def _get_store_path(path, default_file_name): if path is None: path = os.path.join(util.get_ulmo_dir(), DEFAULT_FILE_PATH) if not os.path.exists(path): os.makedirs(path) return os.path.join(path, default_file_name)
def get_raster(layer, bbox, path=None, update_cache=False, check_modified=False, mosaic=False): """downloads National Elevation Dataset raster tiles that cover the given bounding box for the specified data layer. Parameters ---------- layer : str dataset layer name. (see get_available_layers for list) bbox : (sequence of float|str) bounding box of in geographic coordinates of area to download tiles in the format (min longitude, min latitude, max longitude, max latitude) path : ``None`` or path if ``None`` default path will be used update_cache: ``True`` or ``False`` (default) if ``False`` and output file already exists use it. check_modified: ``True`` or ``False`` (default) if tile exists in path, check if newer file exists online and download if available. mosaic: ``True`` or ``False`` (default) if ``True``, mosaic and clip downloaded tiles to the extents of the bbox provided. Requires rasterio package and GDAL. Returns ------- raster_tiles : geojson FeatureCollection metadata as a FeatureCollection. local url of downloaded data is in feature['properties']['file'] """ _check_layer(layer) raster_tiles = _download_tiles(get_raster_availability(layer, bbox), path=path, check_modified=check_modified) if mosaic: if path is None: path = os.path.join(util.get_ulmo_dir(), DEFAULT_FILE_PATH) util.mkdir_if_doesnt_exist(os.path.join(path, 'by_boundingbox')) xmin, ymin, xmax, ymax = [float(n) for n in bbox] uid = util.generate_raster_uid(layer, xmin, ymin, xmax, ymax) output_path = os.path.join(path, 'by_boundingbox', uid + '.tif') if os.path.isfile(output_path) and not update_cache: return output_path raster_files = [ tile['properties']['file'] for tile in raster_tiles['features'] ] util.mosaic_and_clip(raster_files, xmin, ymin, xmax, ymax, output_path) return [output_path] return raster_tiles
def _get_client(wsdl_url, suds_cache=("default",), suds_timeout=None, user_cache=False): """ Open and re-use (persist) a suds.client.Client instance _suds_client throughout the session, to minimize WOF server impact and improve performance. _suds_client is global in scope. Parameters ---------- wsdl_url : str URL of a service's web service definition language (WSDL) description. All WaterOneFlow services publish a WSDL description and this url is the entry point to the service. suds_cache : ``None`` or tuple suds client local cache duration for WSDL description and client object. Pass a cache duration tuple like ('days', 3) to set a custom duration. Duration may be in months, weeks, days, hours, or seconds. If unspecified, the suds default (1 day) will be used. Use ``None`` to turn off caching. suds_timeout : int or float suds SOAP URL open timeout (seconds). If unspecified, the suds default (90 seconds) will be used. user_cache : bool If False (default), use the system temp location to store cache WSDL and other files. Use the default user ulmo directory if True. Returns ------- _suds_client : suds Client Newly or previously instantiated (reused) suds Client object. """ global _suds_client # Handle new or changed client request (create new client) if _suds_client is None or _suds_client.wsdl.url != wsdl_url or not suds_timeout is None: if user_cache: cache_dir = os.path.join(util.get_ulmo_dir(), 'suds') util.mkdir_if_doesnt_exist(cache_dir) _suds_client = suds.client.Client(wsdl_url, cache=ObjectCache(location=cache_dir)) else: _suds_client = suds.client.Client(wsdl_url) if suds_cache is None: _suds_client.set_options(cache=None) else: cache = _suds_client.options.cache # could add some error catching ... if suds_cache[0] == "default": cache.setduration(days=1) else: cache.setduration(**dict([suds_cache])) if not suds_timeout is None: _suds_client.set_options(timeout=suds_timeout) return _suds_client
def get_raster(product_key, bbox, fmt=None, path=None, check_modified=False, mosaic=False): """downloads National Elevation Dataset raster tiles that cover the given bounding box for the specified data layer. Parameters ---------- product_key : str dataset name. (see get_available_datasets for list) bbox : (sequence of float|str) bounding box of in geographic coordinates of area to download tiles in the format (min longitude, min latitude, max longitude, max latitude) fmt : ``None`` or str available formats vary in different datasets. If ``None``, preference will be given to geotiff and then img, followed by whatever fmt is available path : ``None`` or path if ``None`` default path will be used update_cache: ``True`` or ``False`` (default) if ``False`` then tiles will not be re-downloaded if they exist in the path check_modified: ``True`` or ``False`` (default) if tile exists in path, check if newer file exists online and download if available. mosaic: ``True`` or ``False`` (default) if ``True``, mosaic and clip downloaded tiles to the extents of the bbox provided. Requires rasterio package and GDAL. Returns ------- raster_tiles : geojson FeatureCollection metadata as a FeatureCollection. local url of downloaded data is in feature['properties']['file'] """ raster_tiles = _download_tiles( get_raster_availability(product_key, bbox, fmt), path, check_modified) if mosaic: if path is None: path = os.path.join(util.get_ulmo_dir(), DEFAULT_FILE_PATH) util.mkdir_if_doesnt_exist(os.path.join(path, 'by_boundingbox')) uid = util.generate_raster_uid(product_key, xmin, ymin, xmax, ymax) output_path = os.path.join(path, 'by_boundingbox', uid + '.tif') if os.path.isfile(output_path) and not update_cache: return output_path raster_files = [ tile['properties']['file'] for tile in raster_tiles['features'] ] util.mosaic_and_clip(raster_files, xmin, ymin, xmax, ymax, output_path) return [output_path] return raster_tiles
def _download_tiles(tiles, path=None, check_modified=False): if path is None: path = os.path.join(util.get_ulmo_dir(), DEFAULT_FILE_PATH) for tile in tiles['features']: metadata = tile['properties'] layer_path = os.path.join(path, layer_dict[metadata['layer']]) tile['properties']['file'] = util.download_tiles(layer_path, metadata['download url'], metadata['format'], check_modified)[0] return tiles
def get_raster(layer, bbox, path=None, update_cache=False, check_modified=False, mosaic=False): """downloads National Elevation Dataset raster tiles that cover the given bounding box for the specified data layer. Parameters ---------- layer : str dataset layer name. (see get_available_layers for list) bbox : (sequence of float|str) bounding box of in geographic coordinates of area to download tiles in the format (min longitude, min latitude, max longitude, max latitude) path : ``None`` or path if ``None`` default path will be used update_cache: ``True`` or ``False`` (default) if ``False`` and output file already exists use it. check_modified: ``True`` or ``False`` (default) if tile exists in path, check if newer file exists online and download if available. mosaic: ``True`` or ``False`` (default) if ``True``, mosaic and clip downloaded tiles to the extents of the bbox provided. Requires rasterio package and GDAL. Returns ------- raster_tiles : geojson FeatureCollection metadata as a FeatureCollection. local url of downloaded data is in feature['properties']['file'] """ _check_layer(layer) raster_tiles = _download_tiles(get_raster_availability(layer, bbox), path=path, check_modified=check_modified) if mosaic: if path is None: path = os.path.join(util.get_ulmo_dir(), DEFAULT_FILE_PATH) util.mkdir_if_doesnt_exist(os.path.join(path, 'by_boundingbox')) xmin, ymin, xmax, ymax = [float(n) for n in bbox] uid = util.generate_raster_uid(layer, xmin, ymin, xmax, ymax) output_path = os.path.join(path, 'by_boundingbox', uid + '.tif') if os.path.isfile(output_path) and not update_cache: return output_path raster_files = [tile['properties']['file'] for tile in raster_tiles['features']] util.mosaic_and_clip(raster_files, xmin, ymin, xmax, ymax, output_path) return [output_path] return raster_tiles
def get_raster(product_key, bbox, fmt=None, path=None, check_modified=False, mosaic=False): """downloads National Elevation Dataset raster tiles that cover the given bounding box for the specified data layer. Parameters ---------- product_key : str dataset name. (see get_available_datasets for list) bbox : (sequence of float|str) bounding box of in geographic coordinates of area to download tiles in the format (min longitude, min latitude, max longitude, max latitude) fmt : ``None`` or str available formats vary in different datasets. If ``None``, preference will be given to geotiff and then img, followed by whatever fmt is available path : ``None`` or path if ``None`` default path will be used update_cache: ``True`` or ``False`` (default) if ``False`` then tiles will not be re-downloaded if they exist in the path check_modified: ``True`` or ``False`` (default) if tile exists in path, check if newer file exists online and download if available. mosaic: ``True`` or ``False`` (default) if ``True``, mosaic and clip downloaded tiles to the extents of the bbox provided. Requires rasterio package and GDAL. Returns ------- raster_tiles : geojson FeatureCollection metadata as a FeatureCollection. local url of downloaded data is in feature['properties']['file'] """ raster_tiles = _download_tiles(get_raster_availability(product_key, bbox, fmt), path, check_modified) if mosaic: if path is None: path = os.path.join(util.get_ulmo_dir(), DEFAULT_FILE_PATH) util.mkdir_if_doesnt_exist(os.path.join(path, 'by_boundingbox')) uid = util.generate_raster_uid(product_key, xmin, ymin, xmax, ymax) output_path = os.path.join(path, 'by_boundingbox', uid + '.tif') if os.path.isfile(output_path) and not update_cache: return output_path raster_files = [tile['properties']['file'] for tile in raster_tiles['features']] util.mosaic_and_clip(raster_files, xmin, ymin, xmax, ymax, output_path) return [output_path] return raster_tiles
def _get_file_index(path=None, update_cache=False): """Non webservice approach for caching file index Experimental, not currently in use. """ if path is None: path = os.path.join(util.get_ulmo_dir(), DEFAULT_FILE_PATH) filename = os.path.join(path, 'index.json') if not os.path.exists(filename) or update_cache: for dirname in layer_dict.values(): layer_path = os.path.join(path, dirname, 'zip') if not os.path.exists(layer_path): os.makedirs(layer_path) _update_file_index(filename) with open(filename) as f: return json.load(f)
def _download_features(feature_ids, path=None, check_modified=False,): if path is None: path = os.path.join(util.get_ulmo_dir(), DEFAULT_FILE_PATH) if isinstance(feature_ids, basestring): feature_ids = [feature_ids] tiles =[] tile_fmt = '.img' for feature_id in feature_ids: url = SCIENCEBASE_ITEM_URL % feature_id metadata = requests.get(url).json() layer = [a for a in list(layer_dict.keys()) if a in metadata['title']][0] layer_path = os.path.join(path, layer_dict[layer]) tile_urls = [link['uri'] for link in metadata['webLinks'] if link['type']=='download'] tiles.append({'feature_id': feature_id, 'tiles': util.download_tiles(layer_path, tile_urls, tile_fmt, check_modified), }) return tiles
def get_services(bbox=None, user_cache=False): """Retrieves a list of services. Parameters ---------- bbox : ``None`` or 4-tuple Optional argument for a bounding box that covers the area you want to look for services in. This should be a tuple containing (min_longitude, min_latitude, max_longitude, and max_latitude) with these values in decimal degrees. If not provided then the full set of services will be queried from HIS Central. user_cache : bool If False (default), use the system temp location to store cache WSDL and other files. Use the default user ulmo directory if True. Returns ------- services_dicts : list A list of dicts that each contain information on an individual service. """ if user_cache: cache_dir = os.path.join(util.get_ulmo_dir(), 'suds') util.mkdir_if_doesnt_exist(cache_dir) suds_client = suds.client.Client(HIS_CENTRAL_WSDL_URL, cache=ObjectCache(location=cache_dir)) else: suds_client = suds.client.Client(HIS_CENTRAL_WSDL_URL) if bbox is None: services = suds_client.service.GetWaterOneFlowServiceInfo() else: x_min, y_min, x_max, y_max = bbox services = suds_client.service.GetServicesInBox2( xmin=x_min, ymin=y_min, xmax=x_max, ymax=y_max) services = [ _service_dict(service_info) for service_info in services.ServiceInfo ] return services
.. _Lower Colorado River Authority: http://www.lcra.org .. _Water Quality: http://waterquality.lcra.org/ """ from bs4 import BeautifulSoup import logging from geojson import Point, Feature, FeatureCollection # import unicode from ulmo import util import dateutil # import datetime import os.path as op LCRA_WATERQUALITY_DIR = op.join(util.get_ulmo_dir(), 'lcra/waterquality') log = logging.getLogger(__name__) import requests import pandas as pd source_map = { 'LCRA': 'Lower Colorado River Authority', 'UCRA': 'Upper Colorado River Authority', 'CRMWD': 'Colorado River Municipal Water District', 'COA': 'City of Austin', 'TCEQ': 'Texas Commission on Environmental Quality', }
""" import datetime import os.path from bs4 import BeautifulSoup import numpy as np import pandas from ulmo import util try: import cStringIO as StringIO except ImportError: import StringIO USACE_SWTWC_DIR = os.path.join(util.get_ulmo_dir(), 'usace/swtwc') def get_station_data(station_code, date=None, as_dataframe=False): """Fetches data for a station at a given date. Parameters ---------- station_code: str The station code to fetch data for. A list of stations can be retrieved with ``get_stations()`` date : ``None`` or date (see :ref:`dates-and-times`) The date of the data to be queried. If date is ``None`` (default), then data for the current day is retreived. as_dataframe : bool
from __future__ import division from builtins import str from builtins import range from past.utils import old_div import datetime import os import requests import numpy as np import pandas from ulmo import util # directory where drought data will be stashed CPC_DROUGHT_DIR = os.path.join(util.get_ulmo_dir(), 'cpc/drought') # state codes (note: these are not FIPS codes) STATE_CODES = { 'AL': 1, 'AZ': 2, 'AR': 3, 'CA': 4, 'CO': 5, 'CT': 6, 'DE': 7, 'FL': 8, 'GA': 9, 'IA': 13, 'ID': 10, 'IL': 11,
from builtins import str from builtins import range from past.builtins import basestring from contextlib import contextmanager import csv import datetime import gzip import itertools import os import tarfile import numpy as np from ulmo import util NCDC_GSOD_DIR = os.path.join(util.get_ulmo_dir(), 'ncdc/gsod') NCDC_GSOD_STATIONS_FILE = os.path.join(NCDC_GSOD_DIR, 'isd-history.csv') NCDC_GSOD_START_DATE = datetime.date(1929, 1, 1) def get_parameters(): """ retrieve a list of parameter codes available. Reference for GSOD parameters : https://www1.ncdc.noaa.gov/pub/data/gsod/readme.txt Parameters ---------- None Returns ------- dictionary of variables with parameter codes as keys
.. _National Climatic Data Center: http://www.ncdc.noaa.gov .. _Global Historical Climate Network - Daily: http://www.ncdc.noaa.gov/oa/climate/ghcn-daily/ """ from builtins import str from builtins import range from past.builtins import basestring import itertools import os import numpy as np import pandas from ulmo import util GHCN_DAILY_DIR = os.path.join(util.get_ulmo_dir(), 'ncdc/ghcn_daily') def get_data(station_id, elements=None, update=True, as_dataframe=False): """Retrieves data for a given station. Parameters ---------- station_id : str Station ID to retrieve data for. elements : ``None``, str, or list of str If specified, limits the query to given element code(s). update : bool If ``True`` (default), new data files will be downloaded if they are newer than any previously cached files. If ``False``, then previously
~~~~~~~~~~~~~~~~~~~ This module provides direct access to the `National Climatic Data Center`_ `Climate Index Reference Sequential (CIRS)`_ drought dataset. .. _National Climatic Data Center: http://www.ncdc.noaa.gov .. _Climate Index Reference Sequential (CIRS): http://www1.ncdc.noaa.gov/pub/data/cirs/ """ import os.path import pandas from ulmo import util CIRS_DIR = util.get_ulmo_dir('ncdc/cirs') NO_DATA_VALUES = { 'cdd': '-9999.', 'hdd': '-9999.', 'pcp': '-9.99', 'pdsi': '-99.99', 'phdi': '-99.99', 'pmdi': '-99.99', 'sp01': '-99.99', 'sp02': '-99.99', 'sp03': '-99.99', 'sp06': '-99.99', 'sp09': '-99.99', 'sp12': '-99.99', 'sp24': '-99.99',
This module provides access to data provided by the `United States Army Corps of Engineers`_ `Rivergages`_ web site. .. _United States Army Corps of Engineers: http://www.usace.army.mil/ .. _Rivergages: http://rivergages.mvr.usace.army.mil/WaterControl/new/layout.cfm """ import datetime import os.path import requests from bs4 import BeautifulSoup from ulmo import util USACE_RIVERGAGES_DIR = os.path.join(util.get_ulmo_dir(), 'usace/rivergages/') URL = 'http://rivergages.mvr.usace.army.mil/WaterControl/datamining2.cfm' DEFAULT_START_DATE = datetime.date(1800, 1, 1) def get_stations(): path = os.path.join(USACE_RIVERGAGES_DIR, 'datamining_field_list.cfm') with util.open_file_for_url(URL, path, use_bytes=True) as f: soup = BeautifulSoup(f) options = soup.find('select', id='fld_station').find_all('option') stations = _parse_options(options) return stations
.. _Lower Colorado River Authority: http://www.lcra.org .. _Water Quality: http://waterquality.lcra.org/ """ from bs4 import BeautifulSoup import logging from geojson import Point, Feature, FeatureCollection #import unicode from ulmo import util import dateutil # import datetime import os.path as op LCRA_WATERQUALITY_DIR = op.join(util.get_ulmo_dir(), 'lcra/waterquality') log = logging.getLogger(__name__) import requests import pandas as pd source_map = { 'LCRA': 'Lower Colorado River Authority', 'UCRA': 'Upper Colorado River Authority', 'CRMWD': 'Colorado River Municipal Water District', 'COA': 'City of Austin', 'TCEQ': 'Texas Commission on Environmental Quality',
.. _National Climatic Data Center: http://www.ncdc.noaa.gov .. _Global Historical Climate Network - Daily: http://www.ncdc.noaa.gov/oa/climate/ghcn-daily/ """ import itertools import os import numpy as np import pandas from ulmo import util GHCN_DAILY_DIR = os.path.join(util.get_ulmo_dir(), 'ncdc/ghcn_daily') def get_data(station_id, elements=None, update=True, as_dataframe=False): """Retrieves data for a given station. Parameters ---------- station_id : str Station ID to retrieve data for. elements : ``None``, str, or list of str If specified, limits the query to given element code(s). update : bool If ``True`` (default), new data files will be downloaded if they are newer than any previously cached files. If ``False``, then previously
This module provides access to data provided by the `United States Army Corps of Engineers`_ `Rivergages`_ web site. .. _United States Army Corps of Engineers: http://www.usace.army.mil/ .. _Rivergages: http://rivergages.mvr.usace.army.mil/WaterControl/new/layout.cfm """ import datetime import os.path import requests from bs4 import BeautifulSoup from ulmo import util USACE_RIVERGAGES_DIR = os.path.join(util.get_ulmo_dir(), 'usace/rivergages/') URL = 'http://rivergages.mvr.usace.army.mil/WaterControl/datamining2.cfm' DEFAULT_START_DATE = datetime.date(1800, 1, 1) def get_stations(): path = os.path.join(USACE_RIVERGAGES_DIR, 'datamining_field_list.cfm') with util.open_file_for_url(URL, path) as f: soup = BeautifulSoup(f) options = soup.find('select', id='fld_station').find_all('option') stations = _parse_options(options) return stations