def test_hf_extract_nwis_df_parse_two_sites_two_params_iv_return_df(self): actual_df, actual_dict = hf.extract_nwis_df( two_sites_two_params_iv, interpolate=False ) actual_len, actual_width = actual_df.shape self.assertIs( type(actual_df), pd.core.frame.DataFrame, msg="Did not return a df" ) self.assertEqual(actual_len, 93, "Wrong length for dataframe") self.assertEqual(actual_width, 8, "Wrong width for dataframe") expected_columns = [ "USGS:01541000:00060:00000", "USGS:01541000:00060:00000_qualifiers", "USGS:01541000:00065:00000", "USGS:01541000:00065:00000_qualifiers", "USGS:01541200:00060:00000", "USGS:01541200:00060:00000_qualifiers", "USGS:01541200:00065:00000", "USGS:01541200:00065:00000_qualifiers", ] actual_columns = actual_df.columns.values self.assertCountEqual( actual_columns, expected_columns, "column names don't match expected" ) self.assertTrue(actual_df.index.is_unique, "index has repeated values.") self.assertTrue(actual_df.index.is_monotonic, "index is not monotonic.")
def test_hf_extract_nwis_df_accepts_response_obj(self): fake_response = fakeResponse() actual_df, actual_dict = hf.extract_nwis_df(fake_response, interpolate=False) self.assertIsInstance( actual_df, pd.core.frame.DataFrame, msg="Did not return a df" ) self.assertIsInstance(actual_dict, dict, msg="Did not return a dict.")
def test_hf_extract_nwis_df(self): # I need to make a response fixture to test this out!! test = hf.get_nwis("01589440", "dv", "2013-01-01", "2013-01-05") actual = hf.extract_nwis_df(test) self.assertIs(type(actual), pd.core.frame.DataFrame, msg="Did not return a df")
def integration_test_save_read_parquet(self): # This test has side effects: it will create a file. expected_df, expected_meta = hf.extract_nwis_df(two_sites_two_params_iv) filename = "test_filename_delete_me" hf.save_parquet(filename, expected_df, expected_meta) actual_df, actual_meta = hf.read_parquet(filename) assert_frame_equal(expected_df, actual_df) self.assertEqual(expected_meta, actual_meta, "The metadata dict has changed.")
def test_hf_extract_nwis_df_parse_two_sites_two_params_iv_return_df(self): actual_df, actual_dict = hf.extract_nwis_df( two_sites_two_params_iv, interpolate=False ) self.assertIs( type(actual_df), pd.core.frame.DataFrame, msg="Did not return a df" ) self.assertIs(type(actual_dict), dict, msg="Did not return a dict.")
def test_charts_groupby_not_object_dtype(self): # For reasons I don't understand, I think Pandas 0.25.0 counts # DataFrameGroupBy as an object, and you can't use .quintile() on it.? expected_df, expected_dict = hf.extract_nwis_df(test_json, interpolate=False) self.assertFalse(pd.api.types.is_object_dtype(expected_df)) grouped = expected_df.groupby(expected_df.index.weekday) self.assertIsInstance(grouped, pd.core.groupby.generic.DataFrameGroupBy) self.assertFalse(pd.api.types.is_object_dtype(grouped))
def test_hf_extract_nwis_replaces_NWIS_noDataValue_with_npNan(self): actual_df, actual_dict = hf.extract_nwis_df(mult_flags, interpolate=False) actual_nodata = actual_df.loc[ "2019-01-28T16:00:00.000-05:00", "USGS:01542500:00060:00000" ] self.assertTrue( np.isnan(actual_nodata), "The NWIS no data value was not replaced with np.nan. ", )
def test_hf_extract_nwis_stations_df(self): sites = ["01638500", "01646502"] # TODO: test should be the json for a multiple site request. test = hf.get_nwis(sites, "dv", "2013-01-01", "2013-01-05") actual = hf.extract_nwis_df(test) vD = hf.get_nwis_property(test, key='variableDescription') self.assertIs(type(actual), pd.core.frame.DataFrame, msg="Did not return a df")
def test_charts_cycleplot_exists(self): expected_df, expected_dict = hf.extract_nwis_df(test_json, interpolate=False) # Select first numeric column expected_df = expected_df.loc[ :, expected_df.select_dtypes(include="number").columns[0] ] actual_fig, actual_ax = charts.cycleplot(expected_df) self.assertIsInstance(actual_fig, matplotlib.figure.Figure) self.assertIsInstance(actual_ax[0], matplotlib.axes.Axes)
def test_hf_extract_nwis_corrects_for_end_of_DST(self): actual_df, actual_dict = hf.extract_nwis_df(endDST, interpolate=False) actual_len, width = actual_df.shape expected = 292 self.assertEqual( actual_len, expected, "Three days including the end of DST should have 3 * 24 * 4 = 288 observations, plus 4 = 292", )
def test_hf_extract_nwis_adds_missing_tags(self): actual_df, actual_dict = hf.extract_nwis_df(mult_flags, interpolate=False) actual_missing = actual_df.loc[ "2019-01-24 17:00:00-05:00", "USGS:01542500:00060:00000_qualifiers" ] self.assertEqual( actual_missing, "hf.missing", "Missing records should be given 'hf.missing' _qualifier tags.", )
def test_hf_select_data_returns_data_cols(self): actual_df, actual_dict = hf.extract_nwis_df(two_sites_two_params_iv) actual_df = actual_df.reindex(sorted(actual_df.columns), axis=1) actual = hf.select_data(actual_df) expected = [True, False, True, False, True, False, True, False] self.assertListEqual( actual.tolist(), expected, "select_data should return an array of which columns contain the data, not the qualifiers.", )
def test_hf_extract_nwis_adds_upsample_tags(self): actual_df, actual_dict = hf.extract_nwis_df(diff_freq, interpolate=False) actual_upsample = actual_df.loc[ "2018-06-01 00:15:00-04:00", "USGS:01570500:00060:00000_qualifiers" ] self.assertEqual( actual_upsample, "hf.upsampled", "New records created by upsampling should be given 'hf.upsample' _qualifier tags.", )
def test_hf_extract_nwis_bBox2_df(self): sites = None bBox = '-105.430,39.655,-104,39.863' # TODO: test should be the json for a multiple site request. test = hf.get_nwis(sites, "dv", "2013-01-01", "2013-01-05", bBox=bBox) names = hf.get_nwis_property(test, key='name') actual = hf.extract_nwis_df(test) self.assertIs(type(actual), pd.core.frame.DataFrame, msg="Did not return a df")
def test_hf_extract_nwis_interpolates(self): actual_df, actual_dict = hf.extract_nwis_df(diff_freq, interpolate=True) actual_upsample_interpolate = actual_df.loc[ "2018-06-01 00:15:00-04:00", "USGS:01570500:00060:00000" ] self.assertEqual( actual_upsample_interpolate, 42200.0, "New records created by upsampling should have NaNs replaced with interpolated values.", )
def test_hf_extract_nwis_accepts_no_startdate_no_period_interpolate(self): actual_df, actual_dict = hf.extract_nwis_df(recent_only, interpolate=True) expected_shape = ( 2, 4, ) # only the most recent data for two parameters, plus qualifiers = 4 columns; 2 rows: different dates. self.assertEqual( actual_df.shape, expected_shape, "The dataframe should have four columns and two rows.", )
def test_hf_extract_nwis_returns_comma_separated_qualifiers_2(self): actual_df, actual_dict = hf.extract_nwis_df(mult_flags, interpolate=False) actual_flags_2 = actual_df.loc[ "2019-01-28T16:00:00.000-05:00", "USGS:01542500:00060:00000_qualifiers" ] expected_flags_2 = "P,Ice" self.assertEqual( actual_flags_2, expected_flags_2, "The data qualifier flags were not parsed correctly.", )
def test_hf_read_parquet(self, mock_read): expected_df, expected_meta = hf.extract_nwis_df(two_sites_two_params_iv) expected_table = pa.Table.from_pandas(expected_df) meta_dict = expected_table.schema.metadata meta_string = json.dumps(expected_meta).encode() meta_dict[b"hydrofunctions_meta"] = meta_string expected_table = expected_table.replace_schema_metadata(meta_dict) mock_read.return_value = expected_table actual_df, actual_meta = hf.read_parquet("fake_filename") assert_frame_equal(expected_df, actual_df) self.assertEqual(expected_meta, actual_meta, "The metadata dict has changed.")
def test_hf_extract_nwis_iv_gwstations_df(self): # TODO: I need to make a response fixture to test this out!! sites = ["380616075380701", "394008077005601"] test = hf.get_nwis(sites, "iv", "2018-01-01", "2018-01-05", parameterCd='72019') actual = hf.extract_nwis_df(test) self.assertIs(type(actual), pd.core.frame.DataFrame, msg="Did not return a df")
def test_charts_cycleplot_parts(self): expected = hf.extract_nwis_df(test_json) actual_fig, actual_ax = charts.cycleplot(expected) actual_xscale = actual_ax[0].xaxis.get_scale() actual_yscale = actual_ax[0].yaxis.get_scale() actual_ylabel = actual_ax[0].yaxis.get_label_text() self.assertEqual(actual_xscale, 'linear') self.assertEqual(actual_yscale, 'linear') self.assertEqual(actual_ylabel, 'Stream Discharge (m³/s)')
def test_hf_extract_nwis_interpolates_and_adds_tags(self): # Ideally, every data value that was interpolated should have a tag # added to the qualifiers that says it was interpolated. actual_df, actual_dict = hf.extract_nwis_df(diff_freq, interpolate=True) actual_upsample_interpolate_flag = actual_df.loc[ "2018-06-01 00:15:00-04:00", "USGS:01570500:00060:00000_qualifiers" ] expected_flag = "hf.interpolated" self.assertEqual( actual_upsample_interpolate_flag, expected_flag, "Interpolated values should be marked with a flag.", )
def test_charts_cycleplot_parts(self): expected_df, expected_dict = hf.extract_nwis_df(test_json, interpolate=False) actual_fig, actual_ax = charts.cycleplot( expected_df, legend_loc="center", title="test title" ) actual_xscale = actual_ax[0].xaxis.get_scale() actual_yscale = actual_ax[0].yaxis.get_scale() actual_ylabel = actual_ax[0].yaxis.get_label_text() actual_legend = actual_ax[0].get_legend() actual_legend_loc = actual_legend._loc actual_title = ( actual_fig._suptitle.get_text() ) # unofficial title accessor! A little wonky. self.assertEqual(actual_xscale, "linear") self.assertEqual(actual_yscale, "linear") self.assertEqual(actual_ylabel, "Discharge (ft³/s)") self.assertTrue(actual_legend) self.assertEqual( actual_legend_loc, 10 ) # '10' is internal code for legend(loc = 'center') self.assertEqual(actual_title, "test title")
def test_hf_extract_nwis_can_deal_with_duplicated_records_that_have_been_altered_as_input( self, ): # What happens if a scientist replaces an empty record with new # estimated data, and forgets to discard the old data? actualDF = hf.extract_nwis_df(daily_dupe_altered, interpolate=False)
def test_charts_cycleplot_cycle_nonsense_raises_ValueError(self): expected_df, expected_dict = hf.extract_nwis_df(test_json, interpolate=False) with self.assertRaises(ValueError): actual_fig, actual_ax = charts.cycleplot(expected_df, "nonsense")
def test_charts_cycleplot_cycle_diurnalhour(self): expected_df, expected_dict = hf.extract_nwis_df(test_json, interpolate=False) actual_fig, actual_ax = charts.cycleplot(expected_df, "diurnal-hour") self.assertIsInstance(actual_fig, matplotlib.figure.Figure) self.assertIsInstance(actual_ax[0], matplotlib.axes.Axes)
def test_charts_cycleplot_compare_month(self): expected_df, expected_dict = hf.extract_nwis_df(test_json, interpolate=False) actual_fig, actual_ax = charts.cycleplot(expected_df, compare="month") self.assertIsInstance(actual_fig, matplotlib.figure.Figure) self.assertIsInstance(actual_ax[0], matplotlib.axes.Axes)
def test_charts_cycleplot_exists(self): expected = hf.extract_nwis_df(test_json) actual_fig, actual_ax = charts.cycleplot(expected) self.assertIsInstance(actual_fig, matplotlib.figure.Figure) self.assertIsInstance(actual_ax[0], matplotlib.axes.Axes)
if not database_exists(engine.url): create_database(engine.url) site_loc.to_sql('site_locations', engine, if_exists='replace') ## # Pulling in data using hydrofunctions and saving to PostgreSQL database. ## start = '2000-01-01' end = str(datetime.datetime.today().strftime('%Y-%m-%d')) #Gets today's date. for site in site_no: usgs_site = hf.NWIS(site, 'dv', start, end) usgs_site.get_data() usgs_dict = usgs_site.json() df = hf.extract_nwis_df(usgs_dict) # Need to rename columns to "y" and "ds" for FBProphet later. # I also rename the flag columns to "flags" for better documentation. df.rename(index=str, columns={ "USGS:" + site + ":00060:00003": "y", "USGS:" + site + ":00060:00003_qualifiers": "flags" }, inplace=True) # The index is the datetime for each observation. I add a "ds" column using the index. df['ds'] = df.index[:] df['ds'].str.split(pat=' ', expand=True) # I add in all sites to the PostgreSQL database.
def test_hf_save_parquet(self, mock_write): filename = "expected_filename" expected_df, expected_meta = hf.extract_nwis_df(two_sites_two_params_iv) hf.save_parquet(filename, expected_df, expected_meta) pass
def test_hf_extract_nwis_df_parse_multiple_flags(self): actual_df, actual_dict = hf.extract_nwis_df(mult_flags, interpolate=False) self.assertIsInstance( actual_df, pd.core.frame.DataFrame, msg="Did not return a df" ) self.assertIsInstance(actual_dict, dict, msg="Did not return a dict.")