Python NWIS Examples, hydrofunctions.NWIS Python Examples

Example #1

0

Show file

def gageHeightIL():
    stationNum = [
        '04087440',
        #'05536121',
        '05536123',
        '05536118',
        '05536137',
        '05536140',
        '05536890'
    ]
    #'05536290',
    #'05536995']
    stationName = [
        'Lake Michigan at Chicago Lock',
        #'Chicago River at Chicago Lock',
        'Chicago River at Columbus Dr',
        'Chicago River at Grand Ave',
        'Chicago Sanitary and Ship Canal (CSSC) at Western Ave',
        'CSSC at Stickney, IL',
        'CSSC near Lemont, IL'
    ]
    #'Little Calumet River at South Holland, IL',
    #'CSSC at Romeoville, IL']
    df = hf.NWIS(stationNum, 'iv', period='P14D', parameterCd='00065').df()
    for i, n in enumerate(stationNum):
        df.drop(df.columns[2 * (len(stationNum) - i) - 1],
                axis=1,
                inplace=True)
        df.rename(columns={
            'USGS:' + n + ':00065:00000':
            'USGS ' + n + ': ' + stationName[stationNum.index(n)]
        },
                  inplace=True)
        if 'Lemont' in stationName[stationNum.index(
                n)] or 'Romeoville' in stationName[stationNum.index(n)]:
            df['USGS ' + n + ': ' + stationName[stationNum.index(n)]] -= 27.431
        if 'Little Calumet' in stationName[stationNum.index(n)]:
            df['USGS ' + n + ': ' + stationName[stationNum.index(n)]] -= 4.48
    df.to_csv('./csv/gageHeight-IL-dataframe.csv',
              float_format='%.2f',
              na_rep='nan')
    df.plot(linewidth=.75, marker='.', markersize=1,
            figsize=(8, 7)).grid(color='grey', linestyle=':')
    plt.legend(edgecolor='black',
               facecolor='white',
               framealpha=1,
               markerscale=8,
               bbox_to_anchor=(.5, -.2),
               loc='upper center')
    plt.ylabel('Gage height, feet (Chicago City Datum)')
    plt.title('Updated ' + datetime.now().strftime('%m/%d/%Y %H:%M:%S') +
              ' US Central Time')
    plt.tight_layout()
    plt.savefig('./img/gageHeight-IL.png', dpi=150)
    plt.close()

Example #2

0

Show file

def gageHeightIN():
    stationNum = ['05536356', '04092750', '04092677']
    stationName = [
        'Grand Calumet River at Columbia Ave at Hammond, IN',
        'Indiana Harbor Canal at East Chicago, IN',
        'Grand Calumet River at Industrial Hwy at Gary, IN'
    ]
    df = hf.NWIS(stationNum, 'iv', period='P14D', parameterCd='00065').df()
    for i, n in enumerate(stationNum):
        df.drop(df.columns[2 * (len(stationNum) - i) - 1],
                axis=1,
                inplace=True)
        df.rename(columns={
            'USGS:' + n + ':00065:00000':
            'USGS ' + n + ': ' + stationName[stationNum.index(n)]
        },
                  inplace=True)
        if 'Hammond' in stationName[stationNum.index(n)]:
            df['USGS ' + n + ': ' + stationName[stationNum.index(n)]] -= 5.13
        if 'Indiana Harbor' in stationName[stationNum.index(n)]:
            df['USGS ' + n + ': ' + stationName[stationNum.index(n)]] -= 9.28
        if 'Gary' in stationName[stationNum.index(n)]:
            df['USGS ' + n + ': ' + stationName[stationNum.index(n)]] += 0.55
    df['USGS 04092750: Indiana Harbor Canal at East Chicago, IN (6-hour Mean)'] = savgol_filter(
        df['USGS 04092750: Indiana Harbor Canal at East Chicago, IN'],
        73,
        1,
        mode='nearest')
    df.to_csv('./csv/gageHeight-IN-dataframe.csv',
              float_format='%.2f',
              na_rep='nan')
    df.plot(y=[
        'USGS 04092750: Indiana Harbor Canal at East Chicago, IN',
        'USGS 04092750: Indiana Harbor Canal at East Chicago, IN (6-hour Mean)',
        'USGS 04092677: Grand Calumet River at Industrial Hwy at Gary, IN',
        'USGS 05536356: Grand Calumet River at Columbia Ave at Hammond, IN'
    ],
            linewidth=.75,
            marker='.',
            markersize=1,
            figsize=(8, 6),
            color=['lightgrey', 'tab:blue', 'tab:orange',
                   'tab:red']).grid(color='grey', linestyle=':')
    plt.legend(edgecolor='black',
               facecolor='white',
               framealpha=1,
               markerscale=8,
               bbox_to_anchor=(.5, -.2),
               loc='upper center')
    plt.ylabel('Gage height, feet (Chicago City Datum)')
    plt.title('Updated ' + datetime.now().strftime('%m/%d/%Y %H:%M:%S') +
              ' US Central Time')
    plt.tight_layout()
    plt.savefig('./img/gageHeight-IN.png', dpi=150)
    plt.close()

Example #3

0

Show file

def get_cfs_tomorrow(creek_water_gauge_num, location):
    today = datetime.datetime.now().strftime('%Y-%m-%d')
    # print(today)

    yesterday = date.today() - timedelta(days=1)
    yesterday = yesterday.strftime('%Y-%m-%d')
    # print(yesterday)

    two_days_ago = date.today() - timedelta(days=2)
    two_days_ago = two_days_ago.strftime('%Y-%m-%d')
    # print(two_days_ago)

    start = two_days_ago
    end = yesterday

    herring = hf.NWIS(creek_water_gauge_num, 'dv', start, end)
    herring.get_data()

    cfs_yesterday = float(herring.json()['value']['timeSeries'][0]['values'][0]
                          ['value'][1]['value'])
    cfs_two_days_ago = float(herring.json()['value']['timeSeries'][0]['values']
                             [0]['value'][0]['value'])
    # print(f"Yesterday: {cfs_yesterday}")
    # print(f"Two days ago: {cfs_two_days_ago}")
    diff = cfs_yesterday - cfs_two_days_ago
    # print(f"Difference = {round(diff,1)} cfs (positive numbers mean rising cfs)")

    MAX_CFS = 300
    MIN_CFS = 100
    cfs_tomorrow = cfs_yesterday + 2 * diff
    condition = 'none'
    # print(f"CFS Tomorrow will be around {round(cfs_tomorrow,2)}:")
    if (MAX_CFS > cfs_tomorrow > MIN_CFS):
        # print("\t- Just right.")
        condition = 'good'
    elif (cfs_tomorrow < MIN_CFS):
        # print("\t- Too low.")
        condition = 'too low'
    elif (cfs_tomorrow > MAX_CFS):
        # print("\t- Too high.")
        condition = 'too high'
    else:
        print(
            "\tThere's something wrong with your logic if you're seeing this.")

    return {
        "location": location,
        "value": f"{cfs_tomorrow} CFS - {condition}",
        "type": 'Water Gauge'
    }

Example #4

0

Show file

File: get_basin_data.py Project: jsadler2/preprocess_nwm_pgdl

def get_data_from_sites(sites, service, parameter_code, start_date, end_date):
    data_sites = []
    sites_with_param = []
    for site in sites:
        try:
            site_data = hf.NWIS(site, service, start_date, end_date,
                                parameterCd=parameter_code)
            site_data_df = site_data.get_data().df()
            data_sites.append(site_data_df)
            sites_with_param.append(site)
            print('got data for {} ', site)
        except HydroNoDataError:
            print("no data for {}".format(site))
    data_from_sites_combined = pd.concat(data_sites, axis=1)
    return data_from_sites_combined

Example #5

0

Show file

def dischargeIL():
    stationNum = [
        '05536890', '05536290', '05533600', '05537980', '05536085', '05536580',
        '05536500', '05536340'
    ]
    stationName = [
        'CSSC near Lemont, IL', 'Little Calumet River at South Holland, IL',
        'Des Plaines River near Lemont, IL',
        'Des Plaines River at Route 53 at Joliet, IL',
        'North Branch Chicago River at N Pulaski Rd',
        'Stony Creek (west) near Worth, IL',
        'Tinley Creek near Palos Park, IL',
        'Midlothian Creek at Oak Forest, IL'
    ]
    df = hf.NWIS(stationNum, 'iv', period='P14D', parameterCd='00060').df()
    for i, n in enumerate(stationNum):
        df.drop(df.columns[2 * (len(stationNum) - i) - 1],
                axis=1,
                inplace=True)
        df.rename(columns={
            'USGS:' + n + ':00060:00000':
            'USGS ' + n + ': ' + stationName[stationNum.index(n)]
        },
                  inplace=True)
    df[df < 0] = np.nan
    df.to_csv('./csv/discharge-IL-dataframe.csv',
              float_format='%.2f',
              na_rep='nan')
    df.plot(linewidth=.75, marker='.', markersize=1, figsize=(8, 7),
            logy=True).grid(color='grey', linestyle=':')
    plt.legend(edgecolor='black',
               facecolor='white',
               framealpha=1,
               markerscale=8,
               bbox_to_anchor=(.5, -.2),
               loc='upper center')
    plt.ylabel('Discharge, cubic feet per second')
    plt.title('Updated ' + datetime.now().strftime('%m/%d/%Y %H:%M:%S') +
              ' US Central Time')
    plt.tight_layout()
    plt.savefig('./img/discharge-IL.png', dpi=150)
    plt.close()

Example #6

0

Show file

File: hydrograph.py Project: dgketchum/MT_Rsense

def get_station_daily_data(param, start, end, sid, freq='dv', out_dir=None):
    try:
        nwis = hf.NWIS(sid, freq, start_date=start, end_date=end)
        df = nwis.df(param)

        if freq == 'iv':
            out_file = os.path.join(out_dir, '{}_{}.csv'.format(sid, start[:4]))
            df.to_csv(out_file)

        elif out_dir:
            out_file = os.path.join(out_dir, '{}.csv'.format(sid))
            df.to_csv(out_file)

        else:
            return df

    except ValueError as e:
        print(e)
    except hf.exceptions.HydroNoDataError:
        print('no data for {} to {}'.format(start, end))
        pass

Example #7

0

Show file

File: streamflow.py Project: tanxuezhi/Aegis

import hydrofunctions as hf

snake = hf.NWIS('13011000', 'dv', period='P55D')

snake.get_data()

print(snake.df().head())

print(snake.start_date)
print(snake.end_date)

Example #8

0

Show file

File: Data_Processing.py Project: SteveTsui1361/Github_Trail

# coding: utf-8
# This code used to forecasting daily streamflow by first order exponential smoothing model

# Import needed modules
import numpy as np  # vectors and matrices
import pandas as pd  # tables and data manipulations
import warnings  # There would be no warnings anymore
warnings.filterwarnings('ignore')
import hydrofunctions as hf
import matplotlib.pyplot as plt
observation = hf.NWIS('03335500',
                      'iv',
                      start_date='2019-01-01',
                      end_date='2019-06-30')
observation.get_data()
Timeseries = observation.df()
Timeseries.columns = ["discharge", "flag"]
Timeseries.head()
Timeseries.to_csv("Timeseries.csv", sep=',')
Daily = Timeseries.resample('D').mean()

# Check the diachrge plot
get_ipython().run_line_magic('matplotlib', 'inline')
time = pd.to_datetime(Daily.index)
plt.plot(time, Daily.discharge)
plt.xlabel('Time')
plt.ylabel('Discharge (cfs)')
plt.title('Discharge Hydrograph')
plt.show()

Example #9

0

Show file

 def run():
     if pvar.get():
         messagebox.showinfo(
             message='Going to ask USGS for '+periodvar.get()+'-day data... May take some time...',
             icon='info')
     else:
         messagebox.showinfo(
             message='Going to ask USGS for data from '+beginvar.get()+' to '+endvar.get()+'... May take some time...',
             icon='info')
     info = pd.read_csv('stationInfo.csv', dtype={'stationVar':str, 'bcVar':str, 'shiftVar':np.float64})
     q = info[info.bcVar == 'Q']
     h = info[info.bcVar == 'H']
     print('='*30)
     print(q)
     print('='*30)
     print(h)
     print('='*30)
     print('Contacting USGS...')
     if pvar.get():
         dfq = hf.NWIS(q.stationVar.tolist(), 'iv', period='P'+periodvar.get()+'D', parameterCd='00060').df()
         dfh = hf.NWIS(h.stationVar.tolist(), 'iv', period='P'+periodvar.get()+'D', parameterCd='00065').df()
     else:
         dfq = hf.NWIS(q.stationVar.tolist(), 'iv', beginvar.get(), endvar.get(), parameterCd='00060').df()
         dfh = hf.NWIS(h.stationVar.tolist(), 'iv', beginvar.get(), endvar.get(), parameterCd='00065').df()
     for i, station in enumerate(q.stationVar.tolist()):
         dfq.drop(dfq.columns[2*(len(q.stationVar)-i)-1], axis=1, inplace=True)
     for i, station in enumerate(h.stationVar.tolist()):
         dfh.drop(dfh.columns[2*(len(h.stationVar)-i)-1], axis=1, inplace=True)
     if unitvar.get():
         dfq *= 0.3048**3
         dfh *= 0.3048
     for i, shift in enumerate(h.shiftVar.tolist()):
         if shift != 0:
             dfh['USGS:'+h.stationVar.tolist()[i]+':00065:00000'] += shift
     df = dfq.merge(dfh,left_index=True,right_index=True,how='outer')
     df.to_csv('usgs2telemac_raw_data.xls', sep='\t', float_format='%.6f', na_rep='nan')
     
     ax = dfh.interpolate(limit_direction='both').plot(linewidth=.75, marker='o', markersize=.75)
     ax.grid(color='grey', linestyle=':')
     
     if unitvar.get():
         plt.ylabel('Gage height, meter')
     else:
         plt.ylabel('Gage height, feet')
     plt.savefig('H.png',dpi=150)
     plt.close()
     
     ax = dfq.interpolate(limit_direction='both').plot(linewidth=.75, marker='o', markersize=.75)
     ax.grid(color='grey', linestyle=':')
     
     if unitvar.get():
         plt.ylabel('Discharge, cubic meter per second')
     else:
         plt.ylabel('Discharge, cubic feet per second')
     plt.savefig('Q.png',dpi=150)
     plt.close()
     
     t_in_seconds = np.zeros(len(df))
     for i in range(1, len(df)):
         dt = df.index.array[i] - df.index.array[i-1]
         t_in_seconds[i] = t_in_seconds[i-1] + dt.total_seconds()
     df.set_index(t_in_seconds, inplace=True)
     df.interpolate(limit_direction='both', inplace=True)
     head = '#\nT\t'+'\t'.join(['Q('+str(index+1)+')' for index in q.index.values])+'\t'
     head += '\t'.join(['SL('+str(index+1)+')' for index in h.index.values])+'\n'
     if unitvar.get():
         head += 's\t'+'m3/s\t'*len(q)+'m\t'*len(h)+'\n'
     else:
         head += 's\t'+'ft3/s\t'*len(q)+'ft\t'*len(h)+'\n'
     with open('usgs2telemac_liq_boundary.xls', 'w') as f: 
         f.write(head)
     df.to_csv('usgs2telemac_liq_boundary.xls', mode='a', sep='\t', header=False, float_format='%.6f', na_rep='nan')
     print('Done')
     messagebox.showinfo(
         message='job done\n\n\'usgs2telemac_raw_data.xls\' and \'usgs2telemac_liq_boundary.xls\' have been written',
         icon='info')

Example #10

0

Show file

File: testing.py Project: technopig/activity_picker

import hydrofunctions as hf
herring = hf.NWIS('400052105144101', 'dv', period='P55D')
print(herring)

Example #11

0

Show file

dbname = 'usgs_stream_db_log'
engine = create_engine('postgres://%s@localhost/%s' % (username, dbname))

if not database_exists(engine.url):
    create_database(engine.url)

site_loc.to_sql('site_locations', engine, if_exists='replace')

##
# Pulling in data using hydrofunctions and saving to PostgreSQL database.
##
start = '2000-01-01'
end = str(datetime.datetime.today().strftime('%Y-%m-%d'))  #Gets today's date.

for site in site_no:
    usgs_site = hf.NWIS(site, 'dv', start, end)
    usgs_site.get_data()
    usgs_dict = usgs_site.json()
    df = hf.extract_nwis_df(usgs_dict)

    #   Need to rename columns to "y" and "ds" for FBProphet later.
    #   I also rename the flag columns to "flags" for better documentation.
    df.rename(index=str,
              columns={
                  "USGS:" + site + ":00060:00003": "y",
                  "USGS:" + site + ":00060:00003_qualifiers": "flags"
              },
              inplace=True)

    #   The index is the datetime for each observation. I add a "ds" column using the index.
    df['ds'] = df.index[:]

Example #12

0

Show file

    def filter(self):

        # first transform comma separated gage IDs into a list of strings
        gage_ids = self.gages.rstrip().split(',')

        # since HF cannot handle a large number of station IDs, split into chunks of 100
        num_split = math.ceil(len(gage_ids) / 100)

        gage_id_chunks = np.array_split(gage_ids, num_split)

        # semantic checks on params
        # Check (1) start and end date are dates and in right order
        try:
            start_date = pd.to_datetime(self.start, format='%m/%d/%Y')
            end_date = pd.to_datetime(self.end, format='%m/%d/%Y')
        except ValueError as e:
            raise GeoEDFError(
                'Invalid values provided for start or end date to DischargeDateFilter : %s'
                % e)
        except:
            raise GeoEDFError(
                'Invalid values provided for start or end date to DischargeDataFilter'
            )

        if start_date > end_date:
            raise GeoEDFError(
                'Start date cannot be later than end date in DischargeFilter')

        # make sure cutoff is an integer < 100
        try:
            self.cutoff = int(self.cutoff)
            if self.cutoff < 1 or self.cutoff > 100:
                raise GeoEDFError(
                    'Cutoff parameter in DischargeDataFilter must be an integer between 1 and 100'
                )
        except:
            raise GeoEDFError(
                'Cutoff parameter in DischargeDataFilter must be an integer between 1 and 100'
            )

        # next query Hydrofunctions for discharge data for the provided gages
        # 00060 is discharge parameter
        try:
            # process each chunk separately and merge the resulting dataframes
            # discharges holds the merged DF
            discharges = None
            for gage_chunk in gage_id_chunks:
                chunk_data = hf.NWIS(
                    list(gage_chunk),
                    'dv',
                    start_date=start_date.strftime('%Y-%m-%d'),
                    end_date=end_date.strftime('%Y-%m-%d'),
                    parameterCd='00060')
                if discharges is None:
                    discharges = chunk_data.df()
                else:
                    # simple merge
                    discharges = discharges.merge(chunk_data.df(),
                                                  how='outer',
                                                  left_index=True,
                                                  right_index=True)

            # get the statistics of retrieved data, we are looking for count
            # in order to filter by coverage %
            stn_data = discharges.describe()

            # maximum data available
            max_count = stn_data.loc['count'].max()

            # cutoff number of days
            count_cutoff = (max_count * self.cutoff) / 100

            # filter by availability
            keep_stn = (stn_data.loc['count'] >= count_cutoff)

            valid_stns = keep_stn[keep_stn].index.to_list()

            # clean up station IDs since the returned IDs have USGS:####:param format
            filtered_ids = list(
                map(lambda stn_id: stn_id.split(':')[1], valid_stns))

            # if any remain, set the return value to a comma separated list of these IDs
            if len(filtered_ids) > 0:
                self.values.append(','.join(filtered_ids))
        except:
            raise GeoEDFError(
                "Error retrieving discharge data for gages in DischargeDataFilter"
            )

Example #13

0

Show file

    def get(self):

        # semantic checks on params
        # check (0) that start and end year are numeric
        if not isinstance(self.start_yr, int) or not isinstance(
                self.end_yr, int):
            raise GeoEDFError(
                'Start and end year parameters need to be integers')

        # Check (1) start and end year are in right order
        if self.start_yr > self.end_yr:
            raise GeoEDFError(
                'Start year be later than end year in NWISStatInput')

        # initialize data array
        var_data = []

        for year in range(self.start_yr, self.end_yr + 1):
            # next query Hydrofunctions for parameter data for the provided state code
            try:
                start_dt = '%d-01-01' % year
                end_dt = '%d-12-31' % year
                # query data for all stations in given state for this year
                state_res = hf.NWIS(stateCd=self.state,
                                    start_date=start_dt,
                                    end_date=end_dt,
                                    parameterCd=self.variable)
                # compute statistics and extract mean
                state_stats = state_res.df().mean().to_dict()
                # keys are station IDs
                for key, val in state_stats.items():
                    #only extract the annual state values
                    if key.endswith('00003'):
                        # construct a new dict record
                        data = dict()
                        # construct station ID
                        stn_id = 'USGS:%s' % key.split(':')[1]
                        # determine lat-lon for station from metadata
                        state_res_meta = state_res.meta
                        data['lat'] = None
                        data['lon'] = None
                        if stn_id in state_res_meta:
                            if 'siteLatLongSrs' in state_res_meta[stn_id]:
                                if 'latitude' in state_res_meta[stn_id][
                                        'siteLatLongSrs']:
                                    data['lat'] = state_res_meta[stn_id][
                                        'siteLatLongSrs']['latitude']
                                if 'longitude' in state_res_meta[stn_id][
                                        'siteLatLongSrs']:
                                    data['lon'] = state_res_meta[stn_id][
                                        'siteLatLongSrs']['longitude']
                        data['year'] = year
                        data['stn'] = stn_id
                        data['value'] = val
                        var_data.append(data)
            except hf.exceptions.HydroNoDataError:
                pass
            except hf.exceptions.HydroUserWarning:
                pass
            except:
                print(
                    "Error retrieving data for variable %s in year %d for state %s in NWISStatInput"
                    % (self.variable, year, self.state))
                pass
        try:
            #write out to csv file
            var_df = pd.DataFrame(var_data)
            outfile = '%s/%s_%s.csv' % (self.target_path, self.state,
                                        self.variable)
            var_df.to_csv(outfile, index=False)
        except:
            raise GeoEDFError(
                "Error writing out data for variable %s for state %s in NWISStatInput"
                % (self.variable, self.state))