def remove_values(site_code, datetime_dicts, path=None, complevel=None, complib=None, autorepack=True): """Remove values from hdf5 file. Parameters ---------- site_code : str The site code of the site to remove records from. datetime_dicts : a python dict with a list of datetimes for a given variable (key) to set as NaNs. path : file path to hdf5 file. Returns ------- None : ``None`` """ site_data_path = _get_store_path(path, site_code + '.h5') comp_kwargs = _compression_kwargs(complevel=complevel, complib=complib) something_changed = False with _get_store(site_data_path, mode='a', **comp_kwargs) as store: site_group = store.get_node(site_code) if site_group is None: core.log.warning("No site group found for site %s in %s" % (site_code, site_data_path)) return for variable_code, datetimes in datetime_dicts.items(): variable_group_path = site_code + '/' + variable_code values_path = variable_group_path + '/' + 'values' datetimes = [util.convert_datetime(dt) for dt in datetimes] if values_path in store: values_df = store[values_path] original_datetimes = set(values_df.dropna(how='all').index.tolist()) datetimes_to_remove = original_datetimes.intersection(set(datetimes)) if not len(datetimes_to_remove): core.log.info("No %s values matching the given datetimes to remove were found." % variable_code) continue else: values_df.ix[list(datetimes_to_remove), 'value'] = np.nan core.log.info("%i %s values were set to NaNs in file" % (len(datetimes_to_remove), variable_code)) else: core.log.warning("Values path %s not found in %s." % (values_path, site_data_path)) continue store[values_path] = values_df something_changed = True if autorepack: if something_changed: repack(site_data_path, complevel=complevel, complib=complib)
def test_convert_datetime_from_date(): compare_datetimes = [ (datetime.date(2011, 12, 31), datetime.datetime(2011, 12, 31, 0, 0, 0)), ] for test_date, test_datetime in compare_datetimes: converted = util.convert_datetime(test_date) assert converted == test_datetime
def test_convert_datetime_from_datetime(): compare_datetimes = [ datetime.datetime(2011, 12, 31), datetime.datetime(2011, 12, 31, 3, 30), datetime.datetime(2011, 12, 31, 1, 30, 19), datetime.datetime(2011, 12, 31, 0, 0, 1), datetime.datetime(2012, 2, 29), datetime.datetime(2013, 1, 1), ] for test_datetime in compare_datetimes: converted = util.convert_datetime(test_datetime) assert converted == test_datetime
def _parse_current_values(site_el): site_value_els = site_el.findChildren() site_values = dict() for value_el in site_value_els: if value_el.name.lower() == 'datetime': if value_el.get_text().strip() == '': site_values[value_el.name.lower()] = None else: site_values[value_el.name.lower()] = util.convert_datetime( value_el.get_text()) elif value_el.name.lower() == 'location': site_values[value_el.name.lower()] = value_el.get_text().strip() else: if value_el.get_text().strip() == '': site_values[value_el.name.lower()] = None else: site_values[value_el.name.lower()] = float(value_el.get_text()) return site_values
def test_convert_datetime_from_string(): compare_datetimes = [ ('2011-12-31', datetime.datetime(2011, 12, 31)), ('2011-12-31 4:28', datetime.datetime(2011, 12, 31, 4, 28)), ('2011-12-31 4:28:15', datetime.datetime(2011, 12, 31, 4, 28, 15)), ('12/31/2011', datetime.datetime(2011, 12, 31)), ('12/31/2011 1:30:29', datetime.datetime(2011, 12, 31, 1, 30, 29)), ('12/31/2011 01:30:29', datetime.datetime(2011, 12, 31, 1, 30, 29)), ('12/31/2011 01:30', datetime.datetime(2011, 12, 31, 1, 30, 0)), ('2012-02-29', datetime.datetime(2012, 2, 29)), ('2012-2-29', datetime.datetime(2012, 2, 29)), ('2/29/2012', datetime.datetime(2012, 2, 29)), ('02/29/2012', datetime.datetime(2012, 2, 29)), ('2013-01-01', datetime.datetime(2013, 1, 1)), ] for test_str, test_datetime in compare_datetimes: converted = util.convert_datetime(test_str) assert converted == test_datetime
def test_convert_datetime_from_string(): compare_datetimes = [ ("2011-12-31", datetime.datetime(2011, 12, 31)), ("2011-12-31 4:28", datetime.datetime(2011, 12, 31, 4, 28)), ("2011-12-31 4:28:15", datetime.datetime(2011, 12, 31, 4, 28, 15)), ("12/31/2011", datetime.datetime(2011, 12, 31)), ("12/31/2011 1:30:29", datetime.datetime(2011, 12, 31, 1, 30, 29)), ("12/31/2011 01:30:29", datetime.datetime(2011, 12, 31, 1, 30, 29)), ("12/31/2011 01:30", datetime.datetime(2011, 12, 31, 1, 30, 0)), ("2012-02-29", datetime.datetime(2012, 2, 29)), ("2012-2-29", datetime.datetime(2012, 2, 29)), ("2/29/2012", datetime.datetime(2012, 2, 29)), ("02/29/2012", datetime.datetime(2012, 2, 29)), ("2013-01-01", datetime.datetime(2013, 1, 1)), ] for test_str, test_datetime in compare_datetimes: converted = util.convert_datetime(test_str) assert converted == test_datetime
def get_recent_data(site_code, as_dataframe=False): """fetches near real-time instantaneous water quality data for the LCRA bay sites. Parameters ---------- site_code : str The bay site to fetch data for. see `real_time_sites` as_dataframe : bool This determines what format values are returned as. If ``False`` (default), the values will be list of value dicts. If ``True`` then values are returned as pandas.DataFrame. Returns ------- list list of values or dataframe. """ if site_code not in real_time_sites.keys(): log.info('%s is not in the list of LCRA real time salinity sites' % site_code) return {} data_url = 'http://waterquality.lcra.org/salinity.aspx?sNum=%s&name=%s' % ( site_code, real_time_sites[site_code]) data = pd.read_html(data_url, header=0)[1] data.index = data['Date - Time'].apply(lambda x: util.convert_datetime(x)) data.drop('Date - Time', axis=1, inplace=True) data = data.applymap(_nan_values) data.dropna(how='all', axis=0, inplace=True) data.dropna(how='all', axis=1, inplace=True) columns = dict([(column, _beautify_header(column)) for column in data.columns]) data.rename(columns=columns, inplace=True) data = data.astype(float) if as_dataframe: return data else: return util.dict_from_dataframe(data)
def get_recent_data(site_code, as_dataframe=False): """fetches near real-time instantaneous water quality data for the LCRA bay sites. Parameters ---------- site_code : str The bay site to fetch data for. see `real_time_sites` as_dataframe : bool This determines what format values are returned as. If ``False`` (default), the values will be list of value dicts. If ``True`` then values are returned as pandas.DataFrame. Returns ------- list of values or dataframe. """ if site_code not in real_time_sites.keys(): log.info('%s is not in the list of LCRA real time salinity sites' % site_code) return {} data_url = 'http://waterquality.lcra.org/salinity.aspx?sNum=%s&name=%s' % ( site_code, real_time_sites[site_code]) data = pd.read_html(data_url, header=0)[1] data.index = data['Date - Time'].apply(lambda x: util.convert_datetime( x)) data.drop('Date - Time', axis=1, inplace=True) data = data.applymap(_nan_values) data.dropna(how='all', axis=0, inplace=True) data.dropna(how='all', axis=1, inplace=True) columns = dict([(column, _beautify_header(column)) for column in data.columns]) data.rename(columns=columns, inplace=True) data = data.astype(float) if as_dataframe: return data else: return util.dict_from_dataframe(data)
def get_values(wsdl_url, site_code, variable_code, start=None, end=None, suds_cache=("default",), timeout=None): """ Retrieves site values from a WaterOneFlow service using a GetValues request. Parameters ---------- wsdl_url : str URL of a service's web service definition language (WSDL) description. All WaterOneFlow services publish a WSDL description and this url is the entry point to the service. site_code : str Site code of the site you'd like to get values for. Site codes MUST contain the network and be of the form <network>:<site_code>, as is required by WaterOneFlow. variable_code : str Variable code of the variable you'd like to get values for. Variable codes MUST contain the network and be of the form <vocabulary>:<variable_code>, as is required by WaterOneFlow. start : ``None`` or datetime (see :ref:`dates-and-times`) Start of a date range for a query. If both start and end parameters are omitted, the entire time series available will be returned. end : ``None`` or datetime (see :ref:`dates-and-times`) End of a date range for a query. If both start and end parameters are omitted, the entire time series available will be returned. suds_cache : ``None`` or tuple SOAP local cache duration for WSDL description and client object. Pass a cache duration tuple like ('days', 3) to set a custom duration. Duration may be in months, weeks, days, hours, or seconds. If unspecified, the default duration (1 day) will be used. Use ``None`` to turn off caching. timeout : int or float suds SOAP URL open timeout (seconds). If unspecified, the suds default (90 seconds) will be used. Returns ------- site_values : dict a python dict containing values """ suds_client = _get_client(wsdl_url, suds_cache, timeout) # Note from Emilio: # Not clear if WOF servers really do handle time zones (time offsets or # "Z" in the iso8601 datetime strings. In the past, I (Emilio) have # passed naive strings to GetValues(). if a datetime object is passed to # this ulmo function, the isodate code above will include it in the # resulting iso8601 string; if not, no. Test effect of dt_isostr having # a timezone code or offset, vs not having it (the latter, naive dt # strings, is what I've been using all along) # the interpretation of start and end time zone is server-dependent start_dt_isostr = None end_dt_isostr = None if start is not None: start_datetime = util.convert_datetime(start) start_dt_isostr = isodate.datetime_isoformat(start_datetime) if end is not None: end_datetime = util.convert_datetime(end) end_dt_isostr = isodate.datetime_isoformat(end_datetime) waterml_version = _waterml_version(suds_client) response = suds_client.service.GetValues( site_code, variable_code, startDate=start_dt_isostr, endDate=end_dt_isostr) response_buffer = io.BytesIO(util.to_bytes(response)) if waterml_version == '1.0': values = waterml.v1_0.parse_site_values(response_buffer) elif waterml_version == '1.1': values = waterml.v1_1.parse_site_values(response_buffer) if not variable_code is None: return list(values.values())[0] else: return values
def test_convert_datetime_from_date(): compare_datetimes = [(datetime.date(2011, 12, 31), datetime.datetime(2011, 12, 31, 0, 0, 0))] for test_date, test_datetime in compare_datetimes: converted = util.convert_datetime(test_date) assert converted == test_datetime
def get_values(wsdl_url, site_code, variable_code=None, start=None, end=None): """ Retrieves site values from a WaterOneFlow service using a GetValues request. Parameters ---------- wsdl_url : str URL of a service's web service definition language (WSDL) description. All WaterOneFlow services publish a WSDL description and this url is the entry point to the service. site_code : str Site code of the site you'd like to get values for. Site codes MUST contain the network and be of the form <network>:<site_code>, as is required by WaterOneFlow. variable_code : str Variable code of the variable you'd like to get values for. Variable codes MUST contain the network and be of the form <vocabulary>:<variable_code>, as is required by WaterOneFlow. start : ``None`` or datetime (see :ref:`dates-and-times`) Start of a date range for a query. If both start and end parameters are omitted, the entire time series available will be returned. end : ``None`` or datetime (see :ref:`dates-and-times`) End of a date range for a query. If both start and end parameters are omitted, the entire time series available will be returned. Returns ------- site_values : dict a python dict containing values """ suds_client = suds.client.Client(wsdl_url) # Note from Emilio: # Not clear if WOF servers really do handle time zones (time offsets or # "Z" in the iso8601 datetime strings. In the past, I (Emilio) have # passed naive strings to GetValues(). if a datetime object is passed to # this ulmo function, the isodate code above will include it in the # resulting iso8601 string; if not, no. Test effect of dt_isostr having # a timezone code or offset, vs not having it (the latter, naive dt # strings, is what I've been using all along) # the intepretation of start and end time zone is server-dependent start_dt_isostr = None end_dt_isostr = None if start is not None: start_datetime = util.convert_datetime(start) start_dt_isostr = isodate.datetime_isoformat(start_datetime) if end is not None: end_datetime = util.convert_datetime(end) end_dt_isostr = isodate.datetime_isoformat(end_datetime) waterml_version = _waterml_version(suds_client) response = suds_client.service.GetValues(site_code, variable_code, startDate=start_dt_isostr, endDate=end_dt_isostr) response_buffer = StringIO.StringIO(response.encode('ascii', 'ignore')) if waterml_version == '1.0': values = waterml.v1_0.parse_site_values(response_buffer) elif waterml_version == '1.1': values = waterml.v1_1.parse_site_values(response_buffer) if not variable_code is None: return values.values()[0] else: return values
def get_values(wsdl_url, site_code, variable_code, start=None, end=None, suds_cache=("default",), timeout=None, user_cache=False): """ Retrieves site values from a WaterOneFlow service using a GetValues request. Parameters ---------- wsdl_url : str URL of a service's web service definition language (WSDL) description. All WaterOneFlow services publish a WSDL description and this url is the entry point to the service. site_code : str Site code of the site you'd like to get values for. Site codes MUST contain the network and be of the form <network>:<site_code>, as is required by WaterOneFlow. variable_code : str Variable code of the variable you'd like to get values for. Variable codes MUST contain the network and be of the form <vocabulary>:<variable_code>, as is required by WaterOneFlow. start : ``None`` or datetime (see :ref:`dates-and-times`) Start of the query datetime range. If omitted, data from the start of the time series to the ``end`` timestamp will be returned (but see caveat, in note below). end : ``None`` or datetime (see :ref:`dates-and-times`) End of the query datetime range. If omitted, data from the ``start`` timestamp to end of the time series will be returned (but see caveat, in note below). suds_cache : ``None`` or tuple SOAP local cache duration for WSDL description and client object. Pass a cache duration tuple like ('days', 3) to set a custom duration. Duration may be in months, weeks, days, hours, or seconds. If unspecified, the default duration (1 day) will be used. Use ``None`` to turn off caching. timeout : int or float suds SOAP URL open timeout (seconds). If unspecified, the suds default (90 seconds) will be used. user_cache : bool If False (default), use the system temp location to store cache WSDL and other files. Use the default user ulmo directory if True. Returns ------- site_values : dict a python dict containing values Notes ----- If both ``start`` and ``end`` parameters are omitted, the entire time series available will typically be returned. However, some service providers will return an error if either start or end are omitted; this is specially true for services hosted or redirected by CUAHSI via the CUAHSI HydroPortal, which have a 'WSDL' url using the domain https://hydroportal.cuahsi.org. For HydroPortal, a start datetime of '1753-01-01' has been known to return valid results while catching the oldest start times, though the response may be broken up into chunks ('paged'). """ suds_client = _get_client(wsdl_url, suds_cache, timeout, user_cache) # Note from Emilio: # Not clear if WOF servers really do handle time zones (time offsets or # "Z" in the iso8601 datetime strings. In the past, I (Emilio) have # passed naive strings to GetValues(). if a datetime object is passed to # this ulmo function, the isodate code above will include it in the # resulting iso8601 string; if not, no. Test effect of dt_isostr having # a timezone code or offset, vs not having it (the latter, naive dt # strings, is what I've been using all along) # the interpretation of start and end time zone is server-dependent start_dt_isostr = None end_dt_isostr = None if start is not None: start_datetime = util.convert_datetime(start) start_dt_isostr = isodate.datetime_isoformat(start_datetime) if end is not None: end_datetime = util.convert_datetime(end) end_dt_isostr = isodate.datetime_isoformat(end_datetime) waterml_version = _waterml_version(suds_client) response = suds_client.service.GetValues( site_code, variable_code, startDate=start_dt_isostr, endDate=end_dt_isostr) response_buffer = io.BytesIO(util.to_bytes(response)) if waterml_version == '1.0': values = waterml.v1_0.parse_site_values(response_buffer) elif waterml_version == '1.1': values = waterml.v1_1.parse_site_values(response_buffer) if not variable_code is None: return list(values.values())[0] else: return values
def get_values(wsdl_url, site_code, variable_code, start=None, end=None, suds_cache=("default",)): """ Retrieves site values from a WaterOneFlow service using a GetValues request. Parameters ---------- wsdl_url : str URL of a service's web service definition language (WSDL) description. All WaterOneFlow services publish a WSDL description and this url is the entry point to the service. site_code : str Site code of the site you'd like to get values for. Site codes MUST contain the network and be of the form <network>:<site_code>, as is required by WaterOneFlow. variable_code : str Variable code of the variable you'd like to get values for. Variable codes MUST contain the network and be of the form <vocabulary>:<variable_code>, as is required by WaterOneFlow. start : ``None`` or datetime (see :ref:`dates-and-times`) Start of a date range for a query. If both start and end parameters are omitted, the entire time series available will be returned. end : ``None`` or datetime (see :ref:`dates-and-times`) End of a date range for a query. If both start and end parameters are omitted, the entire time series available will be returned. suds_cache: ``None`` or tuple SOAP local cache duration for WSDL description and client object. Pass a cache duration tuple like ('days', 3) to set a custom duration. Duration may be in months, weeks, days, hours, or seconds. If unspecified, the default duration (1 day) will be used. Use ``None`` to turn off caching. Returns ------- site_values : dict a python dict containing values """ suds_client = _get_client(wsdl_url, suds_cache) # Note from Emilio: # Not clear if WOF servers really do handle time zones (time offsets or # "Z" in the iso8601 datetime strings. In the past, I (Emilio) have # passed naive strings to GetValues(). if a datetime object is passed to # this ulmo function, the isodate code above will include it in the # resulting iso8601 string; if not, no. Test effect of dt_isostr having # a timezone code or offset, vs not having it (the latter, naive dt # strings, is what I've been using all along) # the interpretation of start and end time zone is server-dependent start_dt_isostr = None end_dt_isostr = None if start is not None: start_datetime = util.convert_datetime(start) start_dt_isostr = isodate.datetime_isoformat(start_datetime) if end is not None: end_datetime = util.convert_datetime(end) end_dt_isostr = isodate.datetime_isoformat(end_datetime) waterml_version = _waterml_version(suds_client) response = suds_client.service.GetValues( site_code, variable_code, startDate=start_dt_isostr, endDate=end_dt_isostr) response_buffer = io.BytesIO(util.to_bytes(response)) if waterml_version == '1.0': values = waterml.v1_0.parse_site_values(response_buffer) elif waterml_version == '1.1': values = waterml.v1_1.parse_site_values(response_buffer) if not variable_code is None: return list(values.values())[0] else: return values
def get_site_data(site_code, service=None, parameter_code=None, start=None, end=None, period=None, modified_since=None, input_file=None): """Fetches site data. Parameters ---------- site_code : str The site code of the site you want to query data for. service : {``None``, 'instantaneous', 'iv', 'daily', 'dv'} The service to use, either "instantaneous", "daily", or ``None`` (default). If set to ``None``, then both services are used. The abbreviations "iv" and "dv" can be used for "instantaneous" and "daily", respectively. parameter_code : str Parameter code(s) that will be passed as the parameterCd parameter. start : ``None`` or datetime (see :ref:`dates-and-times`) Start of a date range for a query. This parameter is mutually exclusive with period (you cannot use both). end : ``None`` or datetime (see :ref:`dates-and-times`) End of a date range for a query. This parameter is mutually exclusive with period (you cannot use both). period : {``None``, str, datetime.timedelta} Period of time to use for requesting data. This will be passed along as the period parameter. This can either be 'all' to signal that you'd like the entire period of record, or string in ISO 8601 period format (e.g. 'P1Y2M21D' for a period of one year, two months and 21 days) or it can be a datetime.timedelta object representing the period of time. This parameter is mutually exclusive with start/end dates. modified_since : ``None`` or datetime.timedelta Passed along as the modifiedSince parameter. input_file: ``None``, file path or file object If ``None`` (default), then the NWIS web services will be queried, but if a file is passed then this file will be used instead of requesting data from the NWIS web services. Returns ------- data_dict : dict a python dict with parameter codes mapped to value dicts """ url_params = {'format': 'waterml', 'site': site_code} if parameter_code: url_params['parameterCd'] = parameter_code if modified_since: url_params['modifiedSince'] = isodate.duration_isoformat(modified_since) if not (start is None or end is None) and period is not None: raise ValueError("must use either a date range with start/end OR a " "period, but not both") if period is not None: if isinstance(period, basestring): if period == 'all': if service in ('iv', 'instantaneous'): start = datetime.datetime(2007, 10, 1) elif service in ('dv', 'daily'): start = datetime.datetime(1851, 1, 1) else: url_params['period'] = period elif isinstance(period, datetime.timedelta): url_params['period'] = isodate.duration_isoformat(period) if service in ('dv', 'daily'): datetime_formatter = isodate.date_isoformat else: datetime_formatter = isodate.datetime_isoformat if start is not None: start_datetime = util.convert_datetime(start) url_params['startDT'] = datetime_formatter(start_datetime) if end is not None: end_datetime = util.convert_datetime(end) url_params['endDT'] = datetime_formatter(end_datetime) if service is not None: values = _get_site_values(service, url_params, input_file=input_file) else: kwargs = dict(parameter_code=parameter_code, start=start, end=end, period=period, modified_since=modified_since, input_file=input_file) values = get_site_data(site_code, service='daily', **kwargs) values.update( get_site_data(site_code, service='instantaneous', **kwargs)) return values