Example #1
0
def _download_features(
    feature_ids,
    path=None,
    check_modified=False,
):
    if path is None:
        path = os.path.join(util.get_ulmo_dir(), DEFAULT_FILE_PATH)

    if isinstance(feature_ids, basestring):
        feature_ids = [feature_ids]

    tiles = []
    tile_fmt = '.img'
    for feature_id in feature_ids:
        url = SCIENCEBASE_ITEM_URL % feature_id
        metadata = requests.get(url).json()
        layer = [a for a in list(layer_dict.keys())
                 if a in metadata['title']][0]
        layer_path = os.path.join(path, layer_dict[layer])
        tile_urls = [
            link['uri'] for link in metadata['webLinks']
            if link['type'] == 'download'
        ]
        tiles.append({
            'feature_id':
            feature_id,
            'tiles':
            util.download_tiles(layer_path, tile_urls, tile_fmt,
                                check_modified),
        })

    return tiles
Example #2
0
File: core.py Project: emiliom/ulmo
def get_data(county=None, start=None, end=None, as_dataframe=False, data_dir=None):
    """Retreives data.

    Parameters
    ----------
    county : ``None`` or str
        If specified, results will be limited to the county corresponding to the
        given 5-character Texas county fips code i.e. 48???.
    end : ``None`` or date (see :ref:`dates-and-times`)
        Results will be limited to data on or before this date. Default is the
        current date.
    start : ``None`` or date (see :ref:`dates-and-times`)
        Results will be limited to data on or after this date. Default is the
        start of the calendar year for the end date.
    as_dataframe: bool
        If ``False`` (default), a dict with a nested set of dicts will be
        returned with data indexed by 5-character Texas county FIPS code. If ``True``
        then a pandas.DataFrame object will be returned.  The pandas dataframe
        is used internally, so setting this to ``True`` is a little bit faster
        as it skips a serialization step.
    data_dir : ``None`` or directory path
        Directory for holding downloaded data files. If no path is provided
        (default), then a user-specific directory for holding application data
        will be used (the directory will depend on the platform/operating
        system).


    Returns
    -------
    data : dict or pandas.Dataframe
        A dict or pandas.DataFrame representing the data. See the
        ``as_dataframe`` parameter for more.
    """
    if end is None:
        end_date = datetime.date.today()
    else:
        end_date = util.convert_date(end)
    if start is None:
        start_date = datetime.date(end_date.year, 1, 1)
    else:
        start_date = util.convert_date(start)
    if data_dir is None:
        data_dir = os.path.join(util.get_ulmo_dir(), 'twc/kbdi')

    df = pandas.concat([
        _date_dataframe(date, data_dir)
        for date in pandas.period_range(start_date, end_date, freq='D')
    ], ignore_index=True)
    fips_df = _fips_dataframe()
    df = pandas.merge(df, fips_df, left_on='county', right_on='name')
    del df['name']

    if county:
        df = df[df['fips'] == county]

    if as_dataframe:
        return df
    else:
        return _as_data_dict(df)
Example #3
0
def _get_store_path(path, default_file_name):
    if path is None:
        path = os.path.join(util.get_ulmo_dir(), DEFAULT_FILE_PATH)

    if not os.path.exists(path):
        os.makedirs(path)

    return os.path.join(path, default_file_name)
Example #4
0
File: core.py Project: wilsaj/ulmo
def _get_store_path(path, default_file_name):
    if path is None:
        path = os.path.join(util.get_ulmo_dir(), DEFAULT_FILE_PATH)

    if not os.path.exists(path):
        os.makedirs(path)

    return os.path.join(path, default_file_name)
Example #5
0
def get_raster(layer,
               bbox,
               path=None,
               update_cache=False,
               check_modified=False,
               mosaic=False):
    """downloads National Elevation Dataset raster tiles that cover the given bounding box
    for the specified data layer.

    Parameters
    ----------
    layer : str
        dataset layer name. (see get_available_layers for list)
    bbox : (sequence of float|str)
        bounding box of in geographic coordinates of area to download tiles
        in the format (min longitude, min latitude, max longitude, max latitude)
    path : ``None`` or path
        if ``None`` default path will be used
    update_cache: ``True`` or ``False`` (default)
        if ``False`` and output file already exists use it.
    check_modified: ``True`` or ``False`` (default)
        if tile exists in path, check if newer file exists online and download if available.
    mosaic: ``True`` or ``False`` (default)
        if ``True``, mosaic and clip downloaded tiles to the extents of the bbox provided. Requires
        rasterio package and GDAL.

    Returns
    -------
    raster_tiles : geojson FeatureCollection
        metadata as a FeatureCollection. local url of downloaded data is in feature['properties']['file']
    """
    _check_layer(layer)

    raster_tiles = _download_tiles(get_raster_availability(layer, bbox),
                                   path=path,
                                   check_modified=check_modified)

    if mosaic:
        if path is None:
            path = os.path.join(util.get_ulmo_dir(), DEFAULT_FILE_PATH)

        util.mkdir_if_doesnt_exist(os.path.join(path, 'by_boundingbox'))
        xmin, ymin, xmax, ymax = [float(n) for n in bbox]
        uid = util.generate_raster_uid(layer, xmin, ymin, xmax, ymax)
        output_path = os.path.join(path, 'by_boundingbox', uid + '.tif')

        if os.path.isfile(output_path) and not update_cache:
            return output_path

        raster_files = [
            tile['properties']['file'] for tile in raster_tiles['features']
        ]
        util.mosaic_and_clip(raster_files, xmin, ymin, xmax, ymax, output_path)
        return [output_path]

    return raster_tiles
Example #6
0
def _get_client(wsdl_url, suds_cache=("default",), suds_timeout=None, user_cache=False):
    """
    Open and re-use (persist) a suds.client.Client instance _suds_client throughout
    the session, to minimize WOF server impact and improve performance.  _suds_client
    is global in scope.

    Parameters
    ----------
    wsdl_url : str
        URL of a service's web service definition language (WSDL) description.
        All WaterOneFlow services publish a WSDL description and this url is the
        entry point to the service.
    suds_cache : ``None`` or tuple
        suds client local cache duration for WSDL description and client object.
        Pass a cache duration tuple like ('days', 3) to set a custom duration.
        Duration may be in months, weeks, days, hours, or seconds.
        If unspecified, the suds default (1 day) will be used.
        Use ``None`` to turn off caching.
    suds_timeout : int or float
        suds SOAP URL open timeout (seconds).
        If unspecified, the suds default (90 seconds) will be used.
    user_cache : bool
        If False (default), use the system temp location to store cache WSDL and
        other files. Use the default user ulmo directory if True.

    Returns
    -------
    _suds_client : suds Client
        Newly or previously instantiated (reused) suds Client object.
    """
    global _suds_client

    # Handle new or changed client request (create new client)
    if _suds_client is None or _suds_client.wsdl.url != wsdl_url or not suds_timeout is None:
        if user_cache:
            cache_dir = os.path.join(util.get_ulmo_dir(), 'suds')
            util.mkdir_if_doesnt_exist(cache_dir)
            _suds_client = suds.client.Client(wsdl_url, cache=ObjectCache(location=cache_dir))
        else:
            _suds_client = suds.client.Client(wsdl_url)

        if suds_cache is None:
            _suds_client.set_options(cache=None)
        else:
            cache = _suds_client.options.cache
            # could add some error catching ...
            if suds_cache[0] == "default":
                cache.setduration(days=1)
            else:
                cache.setduration(**dict([suds_cache]))

        if not suds_timeout is None:
            _suds_client.set_options(timeout=suds_timeout)

    return _suds_client
Example #7
0
def get_raster(product_key,
               bbox,
               fmt=None,
               path=None,
               check_modified=False,
               mosaic=False):
    """downloads National Elevation Dataset raster tiles that cover the given bounding box 
    for the specified data layer. 

    Parameters
    ----------
    product_key : str
        dataset name. (see get_available_datasets for list)
    bbox : (sequence of float|str)
        bounding box of in geographic coordinates of area to download tiles 
        in the format (min longitude, min latitude, max longitude, max latitude)
    fmt : ``None`` or str
        available formats vary in different datasets. If ``None``, preference will be given
        to geotiff and then img, followed by whatever fmt is available
    path : ``None`` or path
        if ``None`` default path will be used
    update_cache: ``True`` or ``False`` (default)
        if ``False`` then tiles will not be re-downloaded if they exist in the path
    check_modified: ``True`` or ``False`` (default)
        if tile exists in path, check if newer file exists online and download if available.  
    mosaic: ``True`` or ``False`` (default)
        if ``True``, mosaic and clip downloaded tiles to the extents of the bbox provided. Requires
        rasterio package and GDAL.
        
    Returns
    -------
    raster_tiles : geojson FeatureCollection
        metadata as a FeatureCollection. local url of downloaded data is in feature['properties']['file']
    """
    raster_tiles = _download_tiles(
        get_raster_availability(product_key, bbox, fmt), path, check_modified)

    if mosaic:
        if path is None:
            path = os.path.join(util.get_ulmo_dir(), DEFAULT_FILE_PATH)
        util.mkdir_if_doesnt_exist(os.path.join(path, 'by_boundingbox'))
        uid = util.generate_raster_uid(product_key, xmin, ymin, xmax, ymax)
        output_path = os.path.join(path, 'by_boundingbox', uid + '.tif')

        if os.path.isfile(output_path) and not update_cache:
            return output_path

        raster_files = [
            tile['properties']['file'] for tile in raster_tiles['features']
        ]
        util.mosaic_and_clip(raster_files, xmin, ymin, xmax, ymax, output_path)

        return [output_path]

    return raster_tiles
Example #8
0
def _download_tiles(tiles, path=None, check_modified=False):

    if path is None:
        path = os.path.join(util.get_ulmo_dir(), DEFAULT_FILE_PATH)

    for tile in tiles['features']:

        metadata = tile['properties']
        layer_path = os.path.join(path, layer_dict[metadata['layer']])
        tile['properties']['file'] = util.download_tiles(layer_path, metadata['download url'], metadata['format'], check_modified)[0]

    return tiles  
Example #9
0
File: core.py Project: emiliom/ulmo
def _download_tiles(tiles, path=None, check_modified=False):

    if path is None:
        path = os.path.join(util.get_ulmo_dir(), DEFAULT_FILE_PATH)

    for tile in tiles['features']:

        metadata = tile['properties']
        layer_path = os.path.join(path, layer_dict[metadata['layer']])
        tile['properties']['file'] = util.download_tiles(layer_path, metadata['download url'], metadata['format'], check_modified)[0]

    return tiles
Example #10
0
File: core.py Project: emiliom/ulmo
def get_raster(layer, bbox, path=None, update_cache=False,
               check_modified=False, mosaic=False):
    """downloads National Elevation Dataset raster tiles that cover the given bounding box
    for the specified data layer.

    Parameters
    ----------
    layer : str
        dataset layer name. (see get_available_layers for list)
    bbox : (sequence of float|str)
        bounding box of in geographic coordinates of area to download tiles
        in the format (min longitude, min latitude, max longitude, max latitude)
    path : ``None`` or path
        if ``None`` default path will be used
    update_cache: ``True`` or ``False`` (default)
        if ``False`` and output file already exists use it.
    check_modified: ``True`` or ``False`` (default)
        if tile exists in path, check if newer file exists online and download if available.
    mosaic: ``True`` or ``False`` (default)
        if ``True``, mosaic and clip downloaded tiles to the extents of the bbox provided. Requires
        rasterio package and GDAL.

    Returns
    -------
    raster_tiles : geojson FeatureCollection
        metadata as a FeatureCollection. local url of downloaded data is in feature['properties']['file']
    """
    _check_layer(layer)

    raster_tiles = _download_tiles(get_raster_availability(layer, bbox), path=path,
        check_modified=check_modified)

    if mosaic:
        if path is None:
            path = os.path.join(util.get_ulmo_dir(), DEFAULT_FILE_PATH)

        util.mkdir_if_doesnt_exist(os.path.join(path, 'by_boundingbox'))
        xmin, ymin, xmax, ymax = [float(n) for n in bbox]
        uid = util.generate_raster_uid(layer, xmin, ymin, xmax, ymax)
        output_path = os.path.join(path, 'by_boundingbox', uid + '.tif')

        if os.path.isfile(output_path) and not update_cache:
            return output_path

        raster_files = [tile['properties']['file'] for tile in raster_tiles['features']]
        util.mosaic_and_clip(raster_files, xmin, ymin, xmax, ymax, output_path)
        return [output_path]

    return raster_tiles
Example #11
0
def get_raster(product_key, bbox, fmt=None, path=None, check_modified=False, mosaic=False):
    """downloads National Elevation Dataset raster tiles that cover the given bounding box 
    for the specified data layer. 

    Parameters
    ----------
    product_key : str
        dataset name. (see get_available_datasets for list)
    bbox : (sequence of float|str)
        bounding box of in geographic coordinates of area to download tiles 
        in the format (min longitude, min latitude, max longitude, max latitude)
    fmt : ``None`` or str
        available formats vary in different datasets. If ``None``, preference will be given
        to geotiff and then img, followed by whatever fmt is available
    path : ``None`` or path
        if ``None`` default path will be used
    update_cache: ``True`` or ``False`` (default)
        if ``False`` then tiles will not be re-downloaded if they exist in the path
    check_modified: ``True`` or ``False`` (default)
        if tile exists in path, check if newer file exists online and download if available.  
    mosaic: ``True`` or ``False`` (default)
        if ``True``, mosaic and clip downloaded tiles to the extents of the bbox provided. Requires
        rasterio package and GDAL.
        
    Returns
    -------
    raster_tiles : geojson FeatureCollection
        metadata as a FeatureCollection. local url of downloaded data is in feature['properties']['file']
    """
    raster_tiles = _download_tiles(get_raster_availability(product_key, bbox, fmt),
        path, check_modified)

    if mosaic:
        if path is None:
            path = os.path.join(util.get_ulmo_dir(), DEFAULT_FILE_PATH)
        util.mkdir_if_doesnt_exist(os.path.join(path, 'by_boundingbox'))
        uid = util.generate_raster_uid(product_key, xmin, ymin, xmax, ymax)
        output_path = os.path.join(path, 'by_boundingbox', uid + '.tif')

        if os.path.isfile(output_path) and not update_cache:
            return output_path

        raster_files = [tile['properties']['file'] for tile in raster_tiles['features']]
        util.mosaic_and_clip(raster_files, xmin, ymin, xmax, ymax, output_path)
        
        return [output_path]

    return raster_tiles
Example #12
0
def _get_file_index(path=None, update_cache=False):
    """Non webservice approach for caching file index

    Experimental, not currently in use.
    """
    if path is None:
        path = os.path.join(util.get_ulmo_dir(), DEFAULT_FILE_PATH)

    filename = os.path.join(path, 'index.json')

    if not os.path.exists(filename) or update_cache:
        for dirname in layer_dict.values():
            layer_path = os.path.join(path, dirname, 'zip')
            if not os.path.exists(layer_path):
                os.makedirs(layer_path)

        _update_file_index(filename)

    with open(filename) as f:
        return json.load(f)
Example #13
0
def _get_file_index(path=None, update_cache=False):
    """Non webservice approach for caching file index

    Experimental, not currently in use.
    """
    if path is None:
        path = os.path.join(util.get_ulmo_dir(), DEFAULT_FILE_PATH)

    filename = os.path.join(path, 'index.json')

    if not os.path.exists(filename) or update_cache:
        for dirname in layer_dict.values():
            layer_path = os.path.join(path, dirname, 'zip')
            if not os.path.exists(layer_path):
                os.makedirs(layer_path)

        _update_file_index(filename)

    with open(filename) as f:
        return json.load(f)
Example #14
0
File: core.py Project: emiliom/ulmo
def _download_features(feature_ids, path=None, check_modified=False,):
    if path is None:
        path = os.path.join(util.get_ulmo_dir(), DEFAULT_FILE_PATH)

    if isinstance(feature_ids, basestring):
        feature_ids = [feature_ids]

    tiles =[]
    tile_fmt = '.img'
    for feature_id in feature_ids:
        url = SCIENCEBASE_ITEM_URL % feature_id
        metadata = requests.get(url).json()
        layer = [a for a in list(layer_dict.keys()) if a in metadata['title']][0]
        layer_path = os.path.join(path, layer_dict[layer])
        tile_urls = [link['uri'] for link in metadata['webLinks'] if link['type']=='download']
        tiles.append({'feature_id': feature_id,
                      'tiles': util.download_tiles(layer_path, tile_urls, tile_fmt, check_modified),
                      })

    return tiles
Example #15
0
def get_services(bbox=None, user_cache=False):
    """Retrieves a list of services.

    Parameters
    ----------
    bbox : ``None`` or 4-tuple
        Optional argument for a bounding box that covers the area you want to
        look for services in. This should be a tuple containing (min_longitude,
        min_latitude, max_longitude, and max_latitude) with these values in
        decimal degrees. If not provided then the full set of services will be
        queried from HIS Central.
    user_cache : bool
        If False (default), use the system temp location to store cache WSDL and
        other files. Use the default user ulmo directory if True.

    Returns
    -------
    services_dicts : list
        A list of dicts that each contain information on an individual service.
    """
    if user_cache:
        cache_dir = os.path.join(util.get_ulmo_dir(), 'suds')
        util.mkdir_if_doesnt_exist(cache_dir)
        suds_client = suds.client.Client(HIS_CENTRAL_WSDL_URL,
                                          cache=ObjectCache(location=cache_dir))
    else:
        suds_client = suds.client.Client(HIS_CENTRAL_WSDL_URL)

    if bbox is None:
        services = suds_client.service.GetWaterOneFlowServiceInfo()
    else:
        x_min, y_min, x_max, y_max = bbox
        services = suds_client.service.GetServicesInBox2(
            xmin=x_min, ymin=y_min, xmax=x_max, ymax=y_max)

    services = [
        _service_dict(service_info)
        for service_info in services.ServiceInfo
    ]
    return services
Example #16
0
    .. _Lower Colorado River Authority: http://www.lcra.org
    .. _Water Quality: http://waterquality.lcra.org/
"""
from bs4 import BeautifulSoup
import logging
from geojson import Point, Feature, FeatureCollection
# import unicode

from ulmo import util

import dateutil

# import datetime
import os.path as op

LCRA_WATERQUALITY_DIR = op.join(util.get_ulmo_dir(), 'lcra/waterquality')

log = logging.getLogger(__name__)

import requests

import pandas as pd

source_map = {
    'LCRA': 'Lower Colorado River Authority',
    'UCRA': 'Upper Colorado River Authority',
    'CRMWD': 'Colorado River Municipal Water District',
    'COA': 'City of Austin',
    'TCEQ': 'Texas Commission on Environmental Quality',
}
Example #17
0
"""
import datetime
import os.path

from bs4 import BeautifulSoup
import numpy as np
import pandas

from ulmo import util

try:
    import cStringIO as StringIO
except ImportError:
    import StringIO

USACE_SWTWC_DIR = os.path.join(util.get_ulmo_dir(), 'usace/swtwc')


def get_station_data(station_code, date=None, as_dataframe=False):
    """Fetches data for a station at a given date.


    Parameters
    ----------
    station_code: str
        The station code to fetch data for. A list of stations can be retrieved with
        ``get_stations()``
    date : ``None`` or date (see :ref:`dates-and-times`)
        The date of the data to be queried. If date is ``None`` (default), then
        data for the current day is retreived.
    as_dataframe : bool
Example #18
0
from __future__ import division
from builtins import str
from builtins import range
from past.utils import old_div

import datetime
import os
import requests

import numpy as np
import pandas

from ulmo import util

# directory where drought data will be stashed
CPC_DROUGHT_DIR = os.path.join(util.get_ulmo_dir(), 'cpc/drought')

# state codes (note: these are not FIPS codes)
STATE_CODES = {
    'AL': 1,
    'AZ': 2,
    'AR': 3,
    'CA': 4,
    'CO': 5,
    'CT': 6,
    'DE': 7,
    'FL': 8,
    'GA': 9,
    'IA': 13,
    'ID': 10,
    'IL': 11,
Example #19
0
File: core.py Project: wilsaj/ulmo
"""
import datetime
import os.path

from bs4 import BeautifulSoup
import numpy as np
import pandas

from ulmo import util

try:
    import cStringIO as StringIO
except ImportError:
    import StringIO

USACE_SWTWC_DIR = os.path.join(util.get_ulmo_dir(), 'usace/swtwc')


def get_station_data(station_code, date=None, as_dataframe=False):
    """Fetches data for a station at a given date.


    Parameters
    ----------
    station_code: str
        The station code to fetch data for. A list of stations can be retrieved with
        ``get_stations()``
    date : ``None`` or date (see :ref:`dates-and-times`)
        The date of the data to be queried. If date is ``None`` (default), then
        data for the current day is retreived.
    as_dataframe : bool
Example #20
0
from builtins import str
from builtins import range
from past.builtins import basestring
from contextlib import contextmanager
import csv
import datetime
import gzip
import itertools
import os
import tarfile

import numpy as np

from ulmo import util

NCDC_GSOD_DIR = os.path.join(util.get_ulmo_dir(), 'ncdc/gsod')
NCDC_GSOD_STATIONS_FILE = os.path.join(NCDC_GSOD_DIR, 'isd-history.csv')
NCDC_GSOD_START_DATE = datetime.date(1929, 1, 1)

def get_parameters():
    """
    retrieve a list of parameter codes available.
    Reference for GSOD parameters : https://www1.ncdc.noaa.gov/pub/data/gsod/readme.txt

    Parameters
    ----------
        None

    Returns
    -------
        dictionary of variables with parameter codes as keys
Example #21
0
    .. _National Climatic Data Center: http://www.ncdc.noaa.gov
    .. _Global Historical Climate Network - Daily: http://www.ncdc.noaa.gov/oa/climate/ghcn-daily/

"""
from builtins import str
from builtins import range
from past.builtins import basestring
import itertools
import os

import numpy as np
import pandas

from ulmo import util

GHCN_DAILY_DIR = os.path.join(util.get_ulmo_dir(), 'ncdc/ghcn_daily')


def get_data(station_id, elements=None, update=True, as_dataframe=False):
    """Retrieves data for a given station.


    Parameters
    ----------
    station_id : str
        Station ID to retrieve data for.
    elements : ``None``, str, or list of str
        If specified, limits the query to given element code(s).
    update : bool
        If ``True`` (default),  new data files will be downloaded if they are
        newer than any previously cached files. If ``False``, then previously
Example #22
0
File: core.py Project: wilsaj/ulmo
    ~~~~~~~~~~~~~~~~~~~

    This module provides direct access to the `National Climatic Data Center`_
    `Climate Index Reference Sequential (CIRS)`_ drought dataset.

    .. _National Climatic Data Center: http://www.ncdc.noaa.gov
    .. _Climate Index Reference Sequential (CIRS): http://www1.ncdc.noaa.gov/pub/data/cirs/
"""
import os.path

import pandas

from ulmo import util


CIRS_DIR = util.get_ulmo_dir('ncdc/cirs')

NO_DATA_VALUES = {
    'cdd': '-9999.',
    'hdd': '-9999.',
    'pcp': '-9.99',
    'pdsi': '-99.99',
    'phdi': '-99.99',
    'pmdi': '-99.99',
    'sp01': '-99.99',
    'sp02': '-99.99',
    'sp03': '-99.99',
    'sp06': '-99.99',
    'sp09': '-99.99',
    'sp12': '-99.99',
    'sp24': '-99.99',
Example #23
0
    This module provides access to data provided by the `United States Army
    Corps of Engineers`_ `Rivergages`_ web site.

    .. _United States Army Corps of Engineers: http://www.usace.army.mil/
    .. _Rivergages: http://rivergages.mvr.usace.army.mil/WaterControl/new/layout.cfm
"""
import datetime
import os.path

import requests
from bs4 import BeautifulSoup

from ulmo import util

USACE_RIVERGAGES_DIR = os.path.join(util.get_ulmo_dir(), 'usace/rivergages/')
URL = 'http://rivergages.mvr.usace.army.mil/WaterControl/datamining2.cfm'
DEFAULT_START_DATE = datetime.date(1800, 1, 1)


def get_stations():
    path = os.path.join(USACE_RIVERGAGES_DIR, 'datamining_field_list.cfm')

    with util.open_file_for_url(URL, path, use_bytes=True) as f:
        soup = BeautifulSoup(f)
        options = soup.find('select', id='fld_station').find_all('option')
        stations = _parse_options(options)

    return stations

Example #24
0
def get_data(county=None,
             start=None,
             end=None,
             as_dataframe=False,
             data_dir=None):
    """Retreives data.

    Parameters
    ----------
    county : ``None`` or str
        If specified, results will be limited to the county corresponding to the
        given 5-character Texas county fips code i.e. 48???.
    end : ``None`` or date (see :ref:`dates-and-times`)
        Results will be limited to data on or before this date. Default is the
        current date.
    start : ``None`` or date (see :ref:`dates-and-times`)
        Results will be limited to data on or after this date. Default is the
        start of the calendar year for the end date.
    as_dataframe: bool
        If ``False`` (default), a dict with a nested set of dicts will be
        returned with data indexed by 5-character Texas county FIPS code. If ``True``
        then a pandas.DataFrame object will be returned.  The pandas dataframe
        is used internally, so setting this to ``True`` is a little bit faster
        as it skips a serialization step.
    data_dir : ``None`` or directory path
        Directory for holding downloaded data files. If no path is provided
        (default), then a user-specific directory for holding application data
        will be used (the directory will depend on the platform/operating
        system).


    Returns
    -------
    data : dict or pandas.Dataframe
        A dict or pandas.DataFrame representing the data. See the
        ``as_dataframe`` parameter for more.
    """
    if end is None:
        end_date = datetime.date.today()
    else:
        end_date = util.convert_date(end)
    if start is None:
        start_date = datetime.date(end_date.year, 1, 1)
    else:
        start_date = util.convert_date(start)
    if data_dir is None:
        data_dir = os.path.join(util.get_ulmo_dir(), 'twc/kbdi')

    df = pandas.concat([
        _date_dataframe(date, data_dir)
        for date in pandas.period_range(start_date, end_date, freq='D')
    ],
                       ignore_index=True)
    fips_df = _fips_dataframe()
    df = pandas.merge(df, fips_df, left_on='county', right_on='name')
    del df['name']

    if county:
        df = df[df['fips'] == county]

    if as_dataframe:
        return df
    else:
        return _as_data_dict(df)
Example #25
0
    .. _Lower Colorado River Authority: http://www.lcra.org
    .. _Water Quality: http://waterquality.lcra.org/
"""
from bs4 import BeautifulSoup
import logging
from geojson import Point, Feature, FeatureCollection
#import unicode

from ulmo import util

import dateutil

# import datetime
import os.path as op

LCRA_WATERQUALITY_DIR = op.join(util.get_ulmo_dir(), 'lcra/waterquality')


log = logging.getLogger(__name__)

import requests


import pandas as pd

source_map = {
    'LCRA': 'Lower Colorado River Authority',
    'UCRA': 'Upper Colorado River Authority',
    'CRMWD': 'Colorado River Municipal Water District',
    'COA': 'City of Austin',
    'TCEQ': 'Texas Commission on Environmental Quality',
Example #26
0
File: core.py Project: wilsaj/ulmo

    .. _National Climatic Data Center: http://www.ncdc.noaa.gov
    .. _Global Historical Climate Network - Daily: http://www.ncdc.noaa.gov/oa/climate/ghcn-daily/

"""
import itertools
import os

import numpy as np
import pandas

from ulmo import util


GHCN_DAILY_DIR = os.path.join(util.get_ulmo_dir(), 'ncdc/ghcn_daily')


def get_data(station_id, elements=None, update=True, as_dataframe=False):
    """Retrieves data for a given station.


    Parameters
    ----------
    station_id : str
        Station ID to retrieve data for.
    elements : ``None``, str, or list of str
        If specified, limits the query to given element code(s).
    update : bool
        If ``True`` (default),  new data files will be downloaded if they are
        newer than any previously cached files. If ``False``, then previously
Example #27
0
    This module provides access to data provided by the `United States Army
    Corps of Engineers`_ `Rivergages`_ web site.

    .. _United States Army Corps of Engineers: http://www.usace.army.mil/
    .. _Rivergages: http://rivergages.mvr.usace.army.mil/WaterControl/new/layout.cfm
"""
import datetime
import os.path

import requests
from bs4 import BeautifulSoup

from ulmo import util

USACE_RIVERGAGES_DIR = os.path.join(util.get_ulmo_dir(), 'usace/rivergages/')
URL = 'http://rivergages.mvr.usace.army.mil/WaterControl/datamining2.cfm'
DEFAULT_START_DATE = datetime.date(1800, 1, 1)


def get_stations():
    path = os.path.join(USACE_RIVERGAGES_DIR, 'datamining_field_list.cfm')

    with util.open_file_for_url(URL, path) as f:
        soup = BeautifulSoup(f)
        options = soup.find('select', id='fld_station').find_all('option')
        stations = _parse_options(options)

    return stations