def gageHeightIL(): stationNum = [ '04087440', #'05536121', '05536123', '05536118', '05536137', '05536140', '05536890' ] #'05536290', #'05536995'] stationName = [ 'Lake Michigan at Chicago Lock', #'Chicago River at Chicago Lock', 'Chicago River at Columbus Dr', 'Chicago River at Grand Ave', 'Chicago Sanitary and Ship Canal (CSSC) at Western Ave', 'CSSC at Stickney, IL', 'CSSC near Lemont, IL' ] #'Little Calumet River at South Holland, IL', #'CSSC at Romeoville, IL'] df = hf.NWIS(stationNum, 'iv', period='P14D', parameterCd='00065').df() for i, n in enumerate(stationNum): df.drop(df.columns[2 * (len(stationNum) - i) - 1], axis=1, inplace=True) df.rename(columns={ 'USGS:' + n + ':00065:00000': 'USGS ' + n + ': ' + stationName[stationNum.index(n)] }, inplace=True) if 'Lemont' in stationName[stationNum.index( n)] or 'Romeoville' in stationName[stationNum.index(n)]: df['USGS ' + n + ': ' + stationName[stationNum.index(n)]] -= 27.431 if 'Little Calumet' in stationName[stationNum.index(n)]: df['USGS ' + n + ': ' + stationName[stationNum.index(n)]] -= 4.48 df.to_csv('./csv/gageHeight-IL-dataframe.csv', float_format='%.2f', na_rep='nan') df.plot(linewidth=.75, marker='.', markersize=1, figsize=(8, 7)).grid(color='grey', linestyle=':') plt.legend(edgecolor='black', facecolor='white', framealpha=1, markerscale=8, bbox_to_anchor=(.5, -.2), loc='upper center') plt.ylabel('Gage height, feet (Chicago City Datum)') plt.title('Updated ' + datetime.now().strftime('%m/%d/%Y %H:%M:%S') + ' US Central Time') plt.tight_layout() plt.savefig('./img/gageHeight-IL.png', dpi=150) plt.close()
def gageHeightIN(): stationNum = ['05536356', '04092750', '04092677'] stationName = [ 'Grand Calumet River at Columbia Ave at Hammond, IN', 'Indiana Harbor Canal at East Chicago, IN', 'Grand Calumet River at Industrial Hwy at Gary, IN' ] df = hf.NWIS(stationNum, 'iv', period='P14D', parameterCd='00065').df() for i, n in enumerate(stationNum): df.drop(df.columns[2 * (len(stationNum) - i) - 1], axis=1, inplace=True) df.rename(columns={ 'USGS:' + n + ':00065:00000': 'USGS ' + n + ': ' + stationName[stationNum.index(n)] }, inplace=True) if 'Hammond' in stationName[stationNum.index(n)]: df['USGS ' + n + ': ' + stationName[stationNum.index(n)]] -= 5.13 if 'Indiana Harbor' in stationName[stationNum.index(n)]: df['USGS ' + n + ': ' + stationName[stationNum.index(n)]] -= 9.28 if 'Gary' in stationName[stationNum.index(n)]: df['USGS ' + n + ': ' + stationName[stationNum.index(n)]] += 0.55 df['USGS 04092750: Indiana Harbor Canal at East Chicago, IN (6-hour Mean)'] = savgol_filter( df['USGS 04092750: Indiana Harbor Canal at East Chicago, IN'], 73, 1, mode='nearest') df.to_csv('./csv/gageHeight-IN-dataframe.csv', float_format='%.2f', na_rep='nan') df.plot(y=[ 'USGS 04092750: Indiana Harbor Canal at East Chicago, IN', 'USGS 04092750: Indiana Harbor Canal at East Chicago, IN (6-hour Mean)', 'USGS 04092677: Grand Calumet River at Industrial Hwy at Gary, IN', 'USGS 05536356: Grand Calumet River at Columbia Ave at Hammond, IN' ], linewidth=.75, marker='.', markersize=1, figsize=(8, 6), color=['lightgrey', 'tab:blue', 'tab:orange', 'tab:red']).grid(color='grey', linestyle=':') plt.legend(edgecolor='black', facecolor='white', framealpha=1, markerscale=8, bbox_to_anchor=(.5, -.2), loc='upper center') plt.ylabel('Gage height, feet (Chicago City Datum)') plt.title('Updated ' + datetime.now().strftime('%m/%d/%Y %H:%M:%S') + ' US Central Time') plt.tight_layout() plt.savefig('./img/gageHeight-IN.png', dpi=150) plt.close()
def get_cfs_tomorrow(creek_water_gauge_num, location): today = datetime.datetime.now().strftime('%Y-%m-%d') # print(today) yesterday = date.today() - timedelta(days=1) yesterday = yesterday.strftime('%Y-%m-%d') # print(yesterday) two_days_ago = date.today() - timedelta(days=2) two_days_ago = two_days_ago.strftime('%Y-%m-%d') # print(two_days_ago) start = two_days_ago end = yesterday herring = hf.NWIS(creek_water_gauge_num, 'dv', start, end) herring.get_data() cfs_yesterday = float(herring.json()['value']['timeSeries'][0]['values'][0] ['value'][1]['value']) cfs_two_days_ago = float(herring.json()['value']['timeSeries'][0]['values'] [0]['value'][0]['value']) # print(f"Yesterday: {cfs_yesterday}") # print(f"Two days ago: {cfs_two_days_ago}") diff = cfs_yesterday - cfs_two_days_ago # print(f"Difference = {round(diff,1)} cfs (positive numbers mean rising cfs)") MAX_CFS = 300 MIN_CFS = 100 cfs_tomorrow = cfs_yesterday + 2 * diff condition = 'none' # print(f"CFS Tomorrow will be around {round(cfs_tomorrow,2)}:") if (MAX_CFS > cfs_tomorrow > MIN_CFS): # print("\t- Just right.") condition = 'good' elif (cfs_tomorrow < MIN_CFS): # print("\t- Too low.") condition = 'too low' elif (cfs_tomorrow > MAX_CFS): # print("\t- Too high.") condition = 'too high' else: print( "\tThere's something wrong with your logic if you're seeing this.") return { "location": location, "value": f"{cfs_tomorrow} CFS - {condition}", "type": 'Water Gauge' }
def get_data_from_sites(sites, service, parameter_code, start_date, end_date): data_sites = [] sites_with_param = [] for site in sites: try: site_data = hf.NWIS(site, service, start_date, end_date, parameterCd=parameter_code) site_data_df = site_data.get_data().df() data_sites.append(site_data_df) sites_with_param.append(site) print('got data for {} ', site) except HydroNoDataError: print("no data for {}".format(site)) data_from_sites_combined = pd.concat(data_sites, axis=1) return data_from_sites_combined
def dischargeIL(): stationNum = [ '05536890', '05536290', '05533600', '05537980', '05536085', '05536580', '05536500', '05536340' ] stationName = [ 'CSSC near Lemont, IL', 'Little Calumet River at South Holland, IL', 'Des Plaines River near Lemont, IL', 'Des Plaines River at Route 53 at Joliet, IL', 'North Branch Chicago River at N Pulaski Rd', 'Stony Creek (west) near Worth, IL', 'Tinley Creek near Palos Park, IL', 'Midlothian Creek at Oak Forest, IL' ] df = hf.NWIS(stationNum, 'iv', period='P14D', parameterCd='00060').df() for i, n in enumerate(stationNum): df.drop(df.columns[2 * (len(stationNum) - i) - 1], axis=1, inplace=True) df.rename(columns={ 'USGS:' + n + ':00060:00000': 'USGS ' + n + ': ' + stationName[stationNum.index(n)] }, inplace=True) df[df < 0] = np.nan df.to_csv('./csv/discharge-IL-dataframe.csv', float_format='%.2f', na_rep='nan') df.plot(linewidth=.75, marker='.', markersize=1, figsize=(8, 7), logy=True).grid(color='grey', linestyle=':') plt.legend(edgecolor='black', facecolor='white', framealpha=1, markerscale=8, bbox_to_anchor=(.5, -.2), loc='upper center') plt.ylabel('Discharge, cubic feet per second') plt.title('Updated ' + datetime.now().strftime('%m/%d/%Y %H:%M:%S') + ' US Central Time') plt.tight_layout() plt.savefig('./img/discharge-IL.png', dpi=150) plt.close()
def get_station_daily_data(param, start, end, sid, freq='dv', out_dir=None): try: nwis = hf.NWIS(sid, freq, start_date=start, end_date=end) df = nwis.df(param) if freq == 'iv': out_file = os.path.join(out_dir, '{}_{}.csv'.format(sid, start[:4])) df.to_csv(out_file) elif out_dir: out_file = os.path.join(out_dir, '{}.csv'.format(sid)) df.to_csv(out_file) else: return df except ValueError as e: print(e) except hf.exceptions.HydroNoDataError: print('no data for {} to {}'.format(start, end)) pass
import hydrofunctions as hf snake = hf.NWIS('13011000', 'dv', period='P55D') snake.get_data() print(snake.df().head()) print(snake.start_date) print(snake.end_date)
# coding: utf-8 # This code used to forecasting daily streamflow by first order exponential smoothing model # Import needed modules import numpy as np # vectors and matrices import pandas as pd # tables and data manipulations import warnings # There would be no warnings anymore warnings.filterwarnings('ignore') import hydrofunctions as hf import matplotlib.pyplot as plt observation = hf.NWIS('03335500', 'iv', start_date='2019-01-01', end_date='2019-06-30') observation.get_data() Timeseries = observation.df() Timeseries.columns = ["discharge", "flag"] Timeseries.head() Timeseries.to_csv("Timeseries.csv", sep=',') Daily = Timeseries.resample('D').mean() # Check the diachrge plot get_ipython().run_line_magic('matplotlib', 'inline') time = pd.to_datetime(Daily.index) plt.plot(time, Daily.discharge) plt.xlabel('Time') plt.ylabel('Discharge (cfs)') plt.title('Discharge Hydrograph') plt.show()
def run(): if pvar.get(): messagebox.showinfo( message='Going to ask USGS for '+periodvar.get()+'-day data... May take some time...', icon='info') else: messagebox.showinfo( message='Going to ask USGS for data from '+beginvar.get()+' to '+endvar.get()+'... May take some time...', icon='info') info = pd.read_csv('stationInfo.csv', dtype={'stationVar':str, 'bcVar':str, 'shiftVar':np.float64}) q = info[info.bcVar == 'Q'] h = info[info.bcVar == 'H'] print('='*30) print(q) print('='*30) print(h) print('='*30) print('Contacting USGS...') if pvar.get(): dfq = hf.NWIS(q.stationVar.tolist(), 'iv', period='P'+periodvar.get()+'D', parameterCd='00060').df() dfh = hf.NWIS(h.stationVar.tolist(), 'iv', period='P'+periodvar.get()+'D', parameterCd='00065').df() else: dfq = hf.NWIS(q.stationVar.tolist(), 'iv', beginvar.get(), endvar.get(), parameterCd='00060').df() dfh = hf.NWIS(h.stationVar.tolist(), 'iv', beginvar.get(), endvar.get(), parameterCd='00065').df() for i, station in enumerate(q.stationVar.tolist()): dfq.drop(dfq.columns[2*(len(q.stationVar)-i)-1], axis=1, inplace=True) for i, station in enumerate(h.stationVar.tolist()): dfh.drop(dfh.columns[2*(len(h.stationVar)-i)-1], axis=1, inplace=True) if unitvar.get(): dfq *= 0.3048**3 dfh *= 0.3048 for i, shift in enumerate(h.shiftVar.tolist()): if shift != 0: dfh['USGS:'+h.stationVar.tolist()[i]+':00065:00000'] += shift df = dfq.merge(dfh,left_index=True,right_index=True,how='outer') df.to_csv('usgs2telemac_raw_data.xls', sep='\t', float_format='%.6f', na_rep='nan') ax = dfh.interpolate(limit_direction='both').plot(linewidth=.75, marker='o', markersize=.75) ax.grid(color='grey', linestyle=':') if unitvar.get(): plt.ylabel('Gage height, meter') else: plt.ylabel('Gage height, feet') plt.savefig('H.png',dpi=150) plt.close() ax = dfq.interpolate(limit_direction='both').plot(linewidth=.75, marker='o', markersize=.75) ax.grid(color='grey', linestyle=':') if unitvar.get(): plt.ylabel('Discharge, cubic meter per second') else: plt.ylabel('Discharge, cubic feet per second') plt.savefig('Q.png',dpi=150) plt.close() t_in_seconds = np.zeros(len(df)) for i in range(1, len(df)): dt = df.index.array[i] - df.index.array[i-1] t_in_seconds[i] = t_in_seconds[i-1] + dt.total_seconds() df.set_index(t_in_seconds, inplace=True) df.interpolate(limit_direction='both', inplace=True) head = '#\nT\t'+'\t'.join(['Q('+str(index+1)+')' for index in q.index.values])+'\t' head += '\t'.join(['SL('+str(index+1)+')' for index in h.index.values])+'\n' if unitvar.get(): head += 's\t'+'m3/s\t'*len(q)+'m\t'*len(h)+'\n' else: head += 's\t'+'ft3/s\t'*len(q)+'ft\t'*len(h)+'\n' with open('usgs2telemac_liq_boundary.xls', 'w') as f: f.write(head) df.to_csv('usgs2telemac_liq_boundary.xls', mode='a', sep='\t', header=False, float_format='%.6f', na_rep='nan') print('Done') messagebox.showinfo( message='job done\n\n\'usgs2telemac_raw_data.xls\' and \'usgs2telemac_liq_boundary.xls\' have been written', icon='info')
import hydrofunctions as hf herring = hf.NWIS('400052105144101', 'dv', period='P55D') print(herring)
dbname = 'usgs_stream_db_log' engine = create_engine('postgres://%s@localhost/%s' % (username, dbname)) if not database_exists(engine.url): create_database(engine.url) site_loc.to_sql('site_locations', engine, if_exists='replace') ## # Pulling in data using hydrofunctions and saving to PostgreSQL database. ## start = '2000-01-01' end = str(datetime.datetime.today().strftime('%Y-%m-%d')) #Gets today's date. for site in site_no: usgs_site = hf.NWIS(site, 'dv', start, end) usgs_site.get_data() usgs_dict = usgs_site.json() df = hf.extract_nwis_df(usgs_dict) # Need to rename columns to "y" and "ds" for FBProphet later. # I also rename the flag columns to "flags" for better documentation. df.rename(index=str, columns={ "USGS:" + site + ":00060:00003": "y", "USGS:" + site + ":00060:00003_qualifiers": "flags" }, inplace=True) # The index is the datetime for each observation. I add a "ds" column using the index. df['ds'] = df.index[:]
def filter(self): # first transform comma separated gage IDs into a list of strings gage_ids = self.gages.rstrip().split(',') # since HF cannot handle a large number of station IDs, split into chunks of 100 num_split = math.ceil(len(gage_ids) / 100) gage_id_chunks = np.array_split(gage_ids, num_split) # semantic checks on params # Check (1) start and end date are dates and in right order try: start_date = pd.to_datetime(self.start, format='%m/%d/%Y') end_date = pd.to_datetime(self.end, format='%m/%d/%Y') except ValueError as e: raise GeoEDFError( 'Invalid values provided for start or end date to DischargeDateFilter : %s' % e) except: raise GeoEDFError( 'Invalid values provided for start or end date to DischargeDataFilter' ) if start_date > end_date: raise GeoEDFError( 'Start date cannot be later than end date in DischargeFilter') # make sure cutoff is an integer < 100 try: self.cutoff = int(self.cutoff) if self.cutoff < 1 or self.cutoff > 100: raise GeoEDFError( 'Cutoff parameter in DischargeDataFilter must be an integer between 1 and 100' ) except: raise GeoEDFError( 'Cutoff parameter in DischargeDataFilter must be an integer between 1 and 100' ) # next query Hydrofunctions for discharge data for the provided gages # 00060 is discharge parameter try: # process each chunk separately and merge the resulting dataframes # discharges holds the merged DF discharges = None for gage_chunk in gage_id_chunks: chunk_data = hf.NWIS( list(gage_chunk), 'dv', start_date=start_date.strftime('%Y-%m-%d'), end_date=end_date.strftime('%Y-%m-%d'), parameterCd='00060') if discharges is None: discharges = chunk_data.df() else: # simple merge discharges = discharges.merge(chunk_data.df(), how='outer', left_index=True, right_index=True) # get the statistics of retrieved data, we are looking for count # in order to filter by coverage % stn_data = discharges.describe() # maximum data available max_count = stn_data.loc['count'].max() # cutoff number of days count_cutoff = (max_count * self.cutoff) / 100 # filter by availability keep_stn = (stn_data.loc['count'] >= count_cutoff) valid_stns = keep_stn[keep_stn].index.to_list() # clean up station IDs since the returned IDs have USGS:####:param format filtered_ids = list( map(lambda stn_id: stn_id.split(':')[1], valid_stns)) # if any remain, set the return value to a comma separated list of these IDs if len(filtered_ids) > 0: self.values.append(','.join(filtered_ids)) except: raise GeoEDFError( "Error retrieving discharge data for gages in DischargeDataFilter" )
def get(self): # semantic checks on params # check (0) that start and end year are numeric if not isinstance(self.start_yr, int) or not isinstance( self.end_yr, int): raise GeoEDFError( 'Start and end year parameters need to be integers') # Check (1) start and end year are in right order if self.start_yr > self.end_yr: raise GeoEDFError( 'Start year be later than end year in NWISStatInput') # initialize data array var_data = [] for year in range(self.start_yr, self.end_yr + 1): # next query Hydrofunctions for parameter data for the provided state code try: start_dt = '%d-01-01' % year end_dt = '%d-12-31' % year # query data for all stations in given state for this year state_res = hf.NWIS(stateCd=self.state, start_date=start_dt, end_date=end_dt, parameterCd=self.variable) # compute statistics and extract mean state_stats = state_res.df().mean().to_dict() # keys are station IDs for key, val in state_stats.items(): #only extract the annual state values if key.endswith('00003'): # construct a new dict record data = dict() # construct station ID stn_id = 'USGS:%s' % key.split(':')[1] # determine lat-lon for station from metadata state_res_meta = state_res.meta data['lat'] = None data['lon'] = None if stn_id in state_res_meta: if 'siteLatLongSrs' in state_res_meta[stn_id]: if 'latitude' in state_res_meta[stn_id][ 'siteLatLongSrs']: data['lat'] = state_res_meta[stn_id][ 'siteLatLongSrs']['latitude'] if 'longitude' in state_res_meta[stn_id][ 'siteLatLongSrs']: data['lon'] = state_res_meta[stn_id][ 'siteLatLongSrs']['longitude'] data['year'] = year data['stn'] = stn_id data['value'] = val var_data.append(data) except hf.exceptions.HydroNoDataError: pass except hf.exceptions.HydroUserWarning: pass except: print( "Error retrieving data for variable %s in year %d for state %s in NWISStatInput" % (self.variable, year, self.state)) pass try: #write out to csv file var_df = pd.DataFrame(var_data) outfile = '%s/%s_%s.csv' % (self.target_path, self.state, self.variable) var_df.to_csv(outfile, index=False) except: raise GeoEDFError( "Error writing out data for variable %s for state %s in NWISStatInput" % (self.variable, self.state))