def _nwis_parameters(site, service): return {site: list(nwis.get_site_data(site, service=service).keys())}
def get_flows(start_date=None, end_date=None, gauge=None): today = pd.to_datetime('now') print(today) if all(a is None for a in [start_date,end_date]): # if no date/range was specified get the previous 10 days end_date = today start_date = today - pd.Timedelta(days = -10) else: if start_date is not None: start_date = pd.to_datetime(start_date) if start_date > today: # oops! raise ValueError("Invalid start_date in the future!") if end_date is None: offset = 10 else: # two cases for end_date here, it is either: # offset (int) - a number of days to add to start_date # date string - the actual date to stop if type(end_date) is int: if end_date < 0: # if end_date is negative flip the start and end dates start_date = start_date - pd.Timedelta(days = end_date) end_date = start_date + pd.Timedelta(days = end_date) else: end_date = pd.to_datetime(end_date) print(min(end_date,today)) if end_date > today: end_date = today print("start_date:") print(type(start_date)) print(start_date) print("end_date:") print(type(end_date)) print(end_date) #except return None stations = gcgauges() parameter = '00060' flows = None for s in stations['ID']: # if a gauge was specified and this isn't it, skip it if gauge is not None and s != gauge: continue # get the streamflow for the gauge d = nwis.get_site_data(s, service='iv', parameter_code=parameter, start_date=start_date, end_date=end_date, period=period) fd = pd.DataFrame(d['00060:00011']['values'], dtype=object) # drop the qualifiers column fd.drop('qualifiers', axis=1, inplace=True) # strip the timezone info from the timestamp string and convert # the result to a datetime fd['datetime'] = fd['datetime'].str.replace('-0.:00','') fd['datetime'] = pd.to_datetime(fd['datetime']) # force the value to an int and rename the column fd[['value']] = fd[['value']].astype(int) fd = fd.rename(columns={'value': s}) # aggregate the gauge frame into the flows frame if flows is None: flows = fd else: flows = pd.concat([flows, fd], axis=1) return flows
def download(self, catalog_id, file_path, dataset, **kwargs): p = param.ParamOverrides(self, kwargs) parameter = p.parameter start = p.start end = p.end period = p.period if dataset is None: dataset = 'station-' + catalog_id if start and end: period = None pmap = self.parameter_map(invert=True) parameter_code, statistic_code = (pmap[parameter].split(':') + [None])[:2] data = nwis.get_site_data(catalog_id, parameter_code=parameter_code, statistic_code=statistic_code, start=start, end=end, period=period, service=self.service_name) # dict contains only one key since only one parameter/statistic was # downloaded, this would need to be changed if multiple # parameter/stat were downloaded together if not data: raise ValueError('No Data Available') data = list(data.values())[0] # convert to dataframe and cleanup bad data df = pd.DataFrame(data['values']) if df.empty: raise ValueError('No Data Available') df = df.set_index('datetime') df.value = df.value.astype(float) if statistic_code in ['00001', '00002', '00003']: df.index = pd.to_datetime(df.index).to_period('D') else: df.index = pd.to_datetime(df.index) # this is in UTC df[df.values == -999999] = pd.np.nan df.rename(columns={'value': parameter}, inplace=True) file_path = os.path.join(file_path, BASE_PATH, self.service_name, dataset, '{0}.h5'.format(dataset)) del data['values'] metadata = { 'name': dataset, 'metadata': data, 'file_path': file_path, 'file_format': 'timeseries-hdf5', 'datatype': 'timeseries', 'parameter': parameter, 'unit': data['variable']['units']['code'], 'service_id': 'svc://usgs-nwis:{}/{}'.format(self.service_name, catalog_id) } # save data to disk io = load_plugins('io', 'timeseries-hdf5')['timeseries-hdf5'] io.write(file_path, df, metadata) del metadata['service_id'] return metadata
import time # use ulmo library to download USGS streamflow data t = pd.date_range(start='1980-10-01', end='2017-09-30') df = pd.DataFrame(index=t) sites = ['11427200', '11427500', '11427700', '11427750', '11427760', '11427960', '11428400', '11428600', '11428800', '11433060', '11433080'] # STREAMFLOW for k in sites: print(k) data = ug.get_site_data(site_code=k, parameter_code='00060', service='dv', start='1980', end='2017') Q = pd.DataFrame(data['00060:00003']['values']) Q.index = pd.to_datetime(Q.datetime) df = pd.concat([df, Q.value.astype(float).rename('Q-'+k)], axis=1, copy=False) time.sleep(2) sites = ['11427400','11428700'] # STORAGE for k in sites: print(k) data = ug.get_site_data(site_code=k, parameter_code='00054', service='dv',