def test_get_record_validation():
    """Tests the validation parameters of the get_record method"""
    with pytest.raises(TypeError) as type_error:
        get_record(sites=['01491000'], service='not_a_service')
    assert 'Unrecognized service: not_a_service' == str(type_error.value)

    with pytest.raises(TypeError) as type_error:
        get_record(sites=['01491000'], service='stat')
    assert 'stat service not yet implemented' == str(type_error.value)
Beispiel #2
0
    def update(self, service=None, approved=False):
        """Update a service

        Parameters
        ----------
        service : string
            Name of service to upgrade. If none, upgrade all existing services.

        approved : boolean


        TODO: set default approval to True once implemented
        """

        if not service:
            for service in self.services():
                self.update(service=service, approved=approved)

        elif service not in self._approved_services:
            raise TypeError("Unrecognized service")

        elif service == 'site':
            #site has only one record, so simply update the entire table
            updated = nwis.get_record(self.id(), service=service)
            self.put(service, updated)

        else:

            site = self.id()
            old_df = self.get(service)

            if approved:
                last_time = old_df.iloc[0].name.strftime('%Y-%m-%d')

            if not approved:
                last_time = old_df.iloc[-1].name.strftime('%Y-%m-%d')

            new_df = nwis.get_record(site,
                                     start=last_time,
                                     end=None,
                                     service=service)

            if new_df is not None:
                overlap = new_df.index.intersection(old_df.index)
                old_df.drop(overlap, axis=0)

                updated = old_df.append(new_df)
                self.put(service, updated)
Beispiel #3
0
def getstrm_wbs(station_id, end_date):
    """Get stream flow from 
    https://waterdata.usgs.gov/nwis/.
    ---------------------------------
    This function download streamflow. It needs 
    as input the station id number and the end date of data.
    Dataset start on 1989-01-01.
    ---------------------------------
    Parameters:
    station_id = list of string numbers
    end_date = string date as yyyy-mm-dd
    ----------------------------------
    Outputs:
    lastNames = dataframe with streamflow values and dates
    """
    start_date = '1989-01-01'
    flow_data = nwis.get_record(sites=station_id,
                                service='dv',
                                start=start_date,
                                end=end_date,
                                parameterCd='00060')
    flow_data.columns = ['flow', 'code', 'site_no']
    flow_data = flow_data.rename_axis("datetime")
    flow_data['datetime'] = pd.to_datetime(flow_data.index)
    return (flow_data)
Beispiel #4
0
    def _check_user_input(site, start_date, end_date, data_type, nc_output):
        # check site
        try:
            site_info = nwis.get_record(sites=site, service='site')
            if site_info.empty:
                raise ValueError("Incorrect USGS site number.")
        except Exception:
            raise ValueError("Incorrect USGS site number.")

        # check data_type
        if data_type not in ['dv', 'iv']:
            raise ValueError(
                "Incorrect data type: 'dv' as daily value or 'iv' as instantaneous value {} "
                .format(data_type))

        # check time
        try:
            start_datetime = datetime.strptime(start_date, '%Y-%m-%d')
            end_datetime = datetime.strptime(end_date, '%Y-%m-%d')

            if start_datetime > end_datetime:
                raise ValueError('Incorrect start date.')

        except Exception:
            raise ValueError("Incorrect date format, should be YYYY-MM-DD")

        # check csv_output
        if nc_output and nc_output[-3:] != '.nc':
            raise ValueError('Incorrect NetCDF file path.')
Beispiel #5
0
def test_iv_service():
    """Unit test of instantaneous value service
    """
    start = START_DATE
    end = END_DATE
    service = 'iv'
    site = ['03339000', '05447500', '03346500']
    return get_record(site, start, end, service=service)
Beispiel #6
0
def test_measurements_service():
    """Test measurement service
    """
    start = '2018-01-24'
    end = '2018-01-25'
    service = 'measurements'
    site = '03339000'
    df = get_record(site, start, end, service=service)
    return df
Beispiel #7
0
def usgs_data(siteNumber, t1, t2,sel_int = 'HR',parameterCd = '00060'):
    if (sel_int == 'HR'):
        service='iv'
    else:
        service='dv'
    data = nwis.get_record(sites=siteNumber, service=service, start=t1, end=t2,parameterCd=parameterCd)
    site_info, md = nwis.get_info(sites=siteNumber)
    df = data.iloc[:,0:1]
    cname = site_info['station_nm'].iloc[0].split(',')[0]
    df.columns = [cname.title() + ' (USGS)']
    df = df.tz_localize(None)
    df = df.resample('60min').mean()
    return (df)
Beispiel #8
0
    def download(self, service, start=None, end=None):
        """Download

        Parameters
        ----------
        service : string
        start : string
        end : string
        """
        group = self._group(service)

        try:
            df = nwis.get_record(self.id(),
                                 start=start,
                                 end=end,
                                 service=service)
            self.put(service, df)

        except NoSitesError:
            print('{} has no data on {}'.format(self.id(), service))
        continue
    query = """SELECT datenew, year, month, day, site_number FROM nwis.groundwater_daily_site_2
    WHERE site_number = '{}' ORDER BY year DESC, month DESC, day DESC LIMIT 1""".format(
        siteNumbers[i])
    data = pd.read_sql_query(query, cnx)
    if (data.empty):
        continue
    lastdate = data.iloc[0][0]
    lastdateobj = datetime.datetime.strptime(
        lastdate, '%m/%d/%Y') + datetime.timedelta(days=1)
    lastdatefinal = lastdateobj.strftime('%m/%d/%Y')
    lastdatelist = lastdatefinal.split("/")
    lastdatelist = [lastdatelist[2], lastdatelist[0], lastdatelist[1]]
    lastdatestr = "-".join(lastdatelist)
    df = nwis.get_record(sites=siteNumbers[i],
                         service='dv',
                         start=lastdatestr,
                         end=date.today())
    if (df.empty):
        continue
    a_list = df.index.tolist()
    if (len(a_list) > 0):
        for i in range(len(a_list)):
            a_list[i] = str(a_list[i]).replace(" 00:00:00+00:00", "")
            df.index = a_list
    newDF = newDF.append(df, ignore_index=False)
    newDF["date"] = newDF.index
    newDF[['year', 'month', 'day']] = newDF.date.str.split("-", expand=True)
    newDF["datecloser"] = newDF['month'].str.cat(newDF['day'], sep="/")
    newDF["datenew"] = newDF['datecloser'].str.cat(newDF['year'], sep="/")
finalDF = newDF[[
    "site_no", "72019_Mean", "72019_Mean_cd", "72019_Maximum",
Beispiel #10
0
    return reg_model_predictions


# %%
# Step 3: Read in USGS streamflow data and create dataframe of avg weekly flow
# Used nwis.get_record function instead of saving a local file

# Change stop_date each week
station_id = "09506000"
USGS_start = "1989-01-01"
USGS_stop = "2020-10-24"

data_flow = nwis.get_record(sites=station_id,
                            service='dv',
                            start=USGS_start,
                            end=USGS_stop,
                            parameterCd='00060')

# Rename columns
data_flow.columns = ['flow', 'code', 'site_no']

# Make index a recognized datetime format instead of string
data_flow.index = data_flow.index.strftime('%Y-%m-%d')

data_flow['datetime'] = pd.to_datetime(data_flow.index)
data_flow['year'] = pd.DatetimeIndex(data_flow['datetime']).year
data_flow['month'] = pd.DatetimeIndex(data_flow['datetime']).month
data_flow['day'] = pd.DatetimeIndex(data_flow['datetime']).day
data_flow['dayofweek'] = pd.DatetimeIndex(data_flow['datetime']).dayofweek
ax.plot(prc_mean.index, prc_mean["Precipitation"], color='red',
        linestyle='--', label='Observed')
ax.set(title=" Precipitation since 2000", xlabel="Date", ylabel="Precipitation [mm/week]",yscale='log')
ax.legend()
fig.savefig("Historical_precip.png")  # Save figure

# %% Streamflow section
# Set the file name and path to where you have stored the data
# adjust path as necessary

station_id = '09506000'
start_date = '1989-01-01'
end_date = '2020-10-31'

data_flow_day = nwis.get_record(sites=station_id, service='dv',
                          start=start_date, end=end_date,
                          parameterCd='00060')
data_flow_day.columns = ['flow', 'code', 'site_no']
# Rename columns
data_flow_day.index = data_flow_day.index.strftime('%Y-%m-%d')
# Make index a recognized datetime format instead of string
# data_flow_day.index = data_flow_day.index.strftime('%Y-%m-%d')
# %% Read the data into a pandas dataframe
# Expand dates to year month day
data_flow_day['datetime'] = pd.to_datetime(data_flow_day.index)
data_flow_day['year'] = pd.DatetimeIndex(data_flow_day['datetime']).year
data_flow_day['month'] = pd.DatetimeIndex(data_flow_day['datetime']).month
data_flow_day['day'] = pd.DatetimeIndex(data_flow_day['datetime']).dayofweek
data_flow_day['dayofweek'] = pd.DatetimeIndex(data_flow_day['datetime']).dayofweek

# %% AR model that you ended up building
Beispiel #12
0
    def _get_nwis_data(site, start_date, end_date, data_type, nc_output):

        variable_info = {
            '00060': ['discharge', 'cubic feet per second'],
            '00065': ['gage height', 'feet'],
            '00010': ['water temperature', 'degree celsius'],
            '80154': ['Suspended sediment discharge', 'tons per day'],
            '80155': ['Total sediment discharge', 'tons per day'],
            '80225': ['Bedload sediment discharge', 'tons per day']
        }

        # get site info
        site_info = nwis.get_record(sites=site, service='site')

        # get time series data frame
        record_df = nwis.get_record(sites=site,
                                    service=data_type,
                                    start=start_date,
                                    end=end_date)
        filter_names = list(variable_info.keys()) + [
            var_name + '_Mean' for var_name in variable_info.keys()
        ]
        var_col_names = [
            col_name for col_name in record_df.columns
            if col_name in filter_names
        ]

        if record_df.empty or not var_col_names:
            raise ValueError(
                'Time series for discharge variables is not available for site {}.'
                .format(site))

        time_series_df = record_df[var_col_names]
        time_series_df.columns = [
            col_name[:5] for col_name in time_series_df.columns
        ]

        # create xarray dataset
        xr_dataset = time_series_df.to_xarray()

        # assign datetime data to coordinate
        xr_dataset['datetime'] = time_series_df.index.values

        # add site metadata
        xr_dataset.attrs['site_name'] = site_info.station_nm[0]
        xr_dataset.attrs['site_code'] = site_info.site_no[0]
        xr_dataset.attrs['site_latitude'] = site_info.dec_lat_va[0]
        xr_dataset.attrs['site_longitude'] = site_info.dec_long_va[0]
        xr_dataset.attrs['site_altitude'] = site_info.alt_va[0]
        xr_dataset.attrs['site_coord_datum'] = site_info.dec_coord_datum_cd[0]

        # add variable metadata
        for var_name in time_series_df.columns:
            xr_dataset[var_name].attrs['variable_name'] = variable_info[
                var_name][0]
            xr_dataset[var_name].attrs['variable_unit'] = variable_info[
                var_name][1]
            xr_dataset[var_name].attrs['variable_data_type'] = data_type if data_type == 'dv' 'daily value' \
                else 'instantaneous value'

        # save output file as csv file
        if nc_output:
            try:
                xr_dataset.to_netcdf(nc_output)
            except Exception:
                print('Failed to write the data in the NetCDF file.')

        return xr_dataset
Beispiel #13
0
url = "https://waterdata.usgs.gov/nwis/dv?cb_00060=on&format=rdb&site_no=09506000" \
      "&referred_module=sw&period=&begin_date=1989-01-01&end_date=2020-10-19"

#Replace parts of my url with variables 
site = '09506000'
start = '1990-01-01'
end = '2020-10-16'
url = "https://waterdata.usgs.gov/nwis/dv?cb_00060=on&format=rdb&site_no=" + site + \
      "&referred_module=sw&period=&begin_date=" + start + "&end_date=" + end
data2 = pd.read_table(url, skiprows=30, names=['agency_cd', 'site_no',
                                               'datetime', 'flow', 'code'],
                      parse_dates=['datetime'], index_col='datetime')

# %%
obs_day = nwis.get_record(sites=site, service='dv',
                          start=start, end=end,
                          parameterCd='00060')
obs_week = np.mean(obs_day['00060_Mean'])

# we can look at the package directly
# 1) Type conda env list in terminal to see where you environment lives 
# 2) Navigate to that directory 
# 3) From that directory go to /libs/pythonxx/site_packages
# Open up that folder to see all the packages you have!

# %% 
# Option 3: We can generate this URL and get the data using an API
# Technically we were already doing this you just didn't know it 
# API = Application Programming Interface  (Translation - a standard set of appraches/protocols
# for working with a given dataset in a predictable way --- rules for accessing data)
# Different datasets have their own APIs
# %%
# read in the forecast data and setup a dataframe
# filepath = os.path.join('..', 'Seasonal_Foercast_Dates.csv')
filepath = os.path.join('../weekly_results', 'weekly_observations.csv')
print(filepath)
obs_table = pd.read_csv(filepath, index_col='forecast_week')

# %%
# Read in the observations and get weekly averages
for i in range(1, week + 1):
    print(i)
    starti = obs_table.loc[i, 'start_date']
    endi = obs_table.loc[i, 'end_date']

    # read in the data from USGS
    # Read in the streamflow data and get the weekly average
    obs_day = nwis.get_record(sites=station_id,
                              service='dv',
                              start=starti,
                              end=endi,
                              parameterCd='00060')
    obs_table.loc[i, 'observed'] = np.round(np.mean(obs_day['00060_Mean']), 3)

# %%
# Write the updated observations out
filepath_out = os.path.join('..', 'weekly_results', 'weekly_observations.csv')
obs_table.to_csv(filepath_out, index_label='forecast_week')

# %%
Beispiel #15
0
#--- https://github.com/USGS-python/dataretrieval
#--- https://stackoverflow.com/questions/16176996/keep-only-date-part-when-using-pandas-to-datetime
#--- https://stackoverflow.com/questions/50890989/pandas-changing-the-format-of-nan-values-when-saving-to-csv

#--- first import the functions for downloading data from NWIS
import dataretrieval.nwis as nwis
 
#--- specify the USGS site code for which we want data.
stationList = open("USGS - StationIDs.txt").read().splitlines()

#--- specify the USGS parameter code for which we want data.
parameterList = ['00020','00021','00025','00030','00032','00035','00036','00045','00046','00052','46516','46529','72192','72194','99772','45587','45588','45589','45590',]

# get basic info about the site
# df = nwis.get_record(sites=stationList, service='site')
# df.to_csv(r'C:\Users\Roberto\Documents\Climatología\USGS\export_dataframe_sites_info.csv', header=True)
# print(df)

df1 = nwis.get_record(stationList, service='dv', start='2019-12-01', end='2019-12-31')

#--- Use this if table data import wizard from MySQL Workbench will be used
df1.to_csv(r'C:\Users\Roberto\Documents\Climatología\USGS\export_dataframe.csv', date_format='%Y-%m-%d', header=True, na_rep='NULL')

#--- Use this one if LOAD DATA INFILE will be used in MySQL
# df1.to_csv(r'C:\Users\Roberto\Documents\Climatología\USGS\export_dataframe.csv', date_format='%Y-%m-%d', header=True, na_rep='\\N')

print(df1)