def test_filter_by_sql(): """Test filter by sql statement""" df = ExportMixin(df=df_data).filter_by_sql( "SELECT * FROM data WHERE parameter='temperature_air_max_200' AND value < 1.5" ) assert not df.empty df = ExportMixin(df=df_data).filter_by_sql( "SELECT * FROM data WHERE parameter='temperature_air_max_200' AND value > 1.5" ) assert df.empty
def test_filter_by_sql(): # TODO: change this to a test of historical data df = ExportMixin(df=df_data).filter_by_sql( "SELECT * FROM data WHERE parameter='temperature_air_max_200' AND value < 1.5" ) assert not df.empty df = ExportMixin(df=df_data).filter_by_sql( "SELECT * FROM data WHERE parameter='temperature_air_max_200' AND value > 1.5" ) assert df.empty
def test_export_influxdb2_tidy(): """Test export of DataFrame to influxdb v2""" Settings.tidy = True Settings.humanize = True Settings.si_units = False request = DwdObservationRequest( parameter=DwdObservationDataset.CLIMATE_SUMMARY, resolution=DwdObservationResolution.DAILY, period=DwdObservationPeriod.RECENT, ).filter_by_station_id(station_id=[1048]) mock_client = mock.MagicMock() with mock.patch( "influxdb_client.InfluxDBClient", side_effect=[mock_client], create=True, ) as mock_connect: with mock.patch( "influxdb_client.Point", create=True, ): df = request.values.all().df ExportMixin(df=df).to_target("influxdb2://orga:token@localhost/?database=dwd&table=weather") mock_connect.assert_called_once_with(url="http://localhost:8086", org="orga", token="token")
def test_export_influxdb_tidy(): request = DwdObservationRequest( parameter=DwdObservationDataset.CLIMATE_SUMMARY, resolution=DwdObservationResolution.DAILY, period=DwdObservationPeriod.RECENT, tidy=True, si_units=False, ).filter_by_station_id(station_id=[1048]) mock_client = mock.MagicMock() with mock.patch( "influxdb.dataframe_client.DataFrameClient", side_effect=[mock_client], create=True, ) as mock_connect: df = request.values.all().df ExportMixin(df=df).to_target( "influxdb://localhost/?database=dwd&table=weather") mock_connect.assert_called_once_with(database="dwd") mock_client.create_database.assert_called_once_with("dwd") mock_client.write_points.assert_called_once() mock_client.write_points.assert_called_with( dataframe=mock.ANY, measurement="weather", tag_columns=["station_id", "quality", "dataset", "parameter"], batch_size=50000, )
def test_export_cratedb(): """Test export of DataFrame to cratedb""" Settings.tidy = True Settings.humanize = True Settings.si_units = False request = DwdObservationRequest( parameter=DwdObservationDataset.CLIMATE_SUMMARY, resolution=DwdObservationResolution.DAILY, period=DwdObservationPeriod.RECENT, ).filter_by_station_id( station_id=[1048], ) with mock.patch( "pandas.DataFrame.to_sql", ) as mock_to_sql: df = request.values.all().df ExportMixin(df=df).to_target("crate://localhost/?database=test&table=testdrive") mock_to_sql.assert_called_once_with( name="testdrive", con="crate://localhost", schema="test", if_exists="replace", index=False, chunksize=5000, )
def test_format_json(): output = ExportMixin(df=df_data).to_json() response = json.loads(output) station_ids = list(set([reading["station_id"] for reading in response])) assert "01048" in station_ids
def test_format_json(): """Test export of DataFrame to json""" output = ExportMixin(df=df_data).to_json() response = json.loads(output) station_ids = {reading["station_id"] for reading in response} assert "01048" in station_ids
def test_format_csv(): output = ExportMixin(df=df_data).to_csv().strip() assert "station_id,dataset,parameter,date,value,quality" in output assert ( "01048,climate_summary,temperature_air_max_200,2019-12-28T00-00-00,1.3," in output)
def test_to_dict(): """Test export of DataFrame to dictioanry""" data = ExportMixin(df=df_data).to_dict() assert data == [ { "dataset": "climate_summary", "date": "2019-12-28T00:00:00+00:00", "parameter": "temperature_air_max_200", "quality": None, "station_id": "01048", "value": 1.3, }, ]
def test_export_unknown(): request = DwdObservationRequest( parameter=DwdObservationDataset.CLIMATE_SUMMARY, resolution=DwdObservationResolution.DAILY, period=DwdObservationPeriod.RECENT, ).filter_by_station_id(station_id=[1048], ) df = request.values.all().df with pytest.raises(KeyError) as ex: ExportMixin(df=df).to_target("file:///test.foobar") ex.match("Unknown export file type")
def test_export_influxdb1_tidy(): """Test export of DataFrame to influxdb v1""" Settings.tidy = True Settings.humanize = True Settings.si_units = False request = DwdObservationRequest( parameter=DwdObservationDataset.CLIMATE_SUMMARY, resolution=DwdObservationResolution.DAILY, period=DwdObservationPeriod.RECENT, ).filter_by_station_id(station_id=[1048]) mock_client = mock.MagicMock() with mock.patch( "influxdb.InfluxDBClient", side_effect=[mock_client], create=True, ) as mock_connect: df = request.values.all().df ExportMixin(df=df).to_target("influxdb://localhost/?database=dwd&table=weather") mock_connect.assert_called_once_with( host="localhost", port=8086, username=None, password=None, database="dwd", ssl=False, ) mock_client.create_database.assert_called_once_with("dwd") mock_client.write_points.assert_called_once() mock_client.write_points.assert_called_with( points=mock.ANY, batch_size=50000, ) points = mock_client.write_points.call_args.kwargs["points"] assert points[0]["measurement"] == "weather" assert list(points[0]["tags"].keys()) == [ "station_id", "quality", "dataset", "parameter", ] assert list(points[0]["fields"].keys()) == [ "value", ]
def test_export_unknown(): """Test export of DataFrame to unknown format""" Settings.tidy = True Settings.humanize = True Settings.si_units = True request = DwdObservationRequest( parameter=DwdObservationDataset.CLIMATE_SUMMARY, resolution=DwdObservationResolution.DAILY, period=DwdObservationPeriod.RECENT, ).filter_by_station_id( station_id=[1048], ) df = request.values.all().df with pytest.raises(KeyError) as ex: ExportMixin(df=df).to_target("file:///test.foobar") ex.match("Unknown export file type")
def test_export_duckdb(): """Test export of DataFrame to duckdb""" Settings.tidy = True Settings.humanize = True Settings.si_units = False request = DwdObservationRequest( parameter=DwdObservationDataset.CLIMATE_SUMMARY, resolution=DwdObservationResolution.DAILY, period=DwdObservationPeriod.RECENT, ).filter_by_station_id(station_id=[1048]) mock_connection = mock.MagicMock() with mock.patch("duckdb.connect", side_effect=[mock_connection], create=True) as mock_connect: df = request.values.all().df ExportMixin(df=df).to_target("duckdb:///test.duckdb?table=testdrive") mock_connect.assert_called_once_with(database="test.duckdb", read_only=False) mock_connection.register.assert_called_once() mock_connection.execute.assert_called() mock_connection.table.assert_called_once_with("testdrive") mock_connection.close.assert_called_once()
def test_export_zarr(tmpdir_factory): """Test export of DataFrame to zarr""" Settings.tidy = False Settings.humanize = True Settings.si_units = False # Request data. request = DwdObservationRequest( parameter=DwdObservationDataset.CLIMATE_SUMMARY, resolution=DwdObservationResolution.DAILY, start_date="2019", end_date="2020", ).filter_by_station_id( station_id=[1048], ) df = request.values.all().df # Save to Zarr group. filename = tmpdir_factory.mktemp("data").join("observations.zarr") ExportMixin(df=df).to_target(f"file://{filename}") # Read back Zarr group. group = zarr.open(str(filename), mode="r") # Validate dimensions. assert len(group) == 20 assert len(group.index) == 366 # Validate column names. assert set(group.keys()) == { "index", "station_id", "dataset", "date", "qn_3", "wind_gust_max", "wind_speed", "qn_4", "precipitation_height", "precipitation_form", "sunshine_duration", "snow_depth", "cloud_cover_total", "pressure_vapor", "pressure_air_site", "temperature_air_mean_200", "humidity", "temperature_air_max_200", "temperature_air_min_200", "temperature_air_min_005", } # Validate content. data = group assert data["date"][0] == np.datetime64(datetime.datetime(2019, 1, 1, 0, 0, tzinfo=datetime.timezone.utc)) assert data["temperature_air_min_005"][0] == 1.5 assert data["date"][-1] == np.datetime64(datetime.datetime(2020, 1, 1, 0, 0, tzinfo=datetime.timezone.utc)) assert data["temperature_air_min_005"][-1] == -4.6 shutil.rmtree(filename)
def test_export_influxdb1_tabular(): """Test export of DataFrame to influxdb v1""" Settings.tidy = False Settings.humanize = True Settings.si_units = False request = DwdObservationRequest( parameter=DwdObservationDataset.CLIMATE_SUMMARY, resolution=DwdObservationResolution.DAILY, period=DwdObservationPeriod.RECENT, ).filter_by_station_id(station_id=[1048]) mock_client = mock.MagicMock() with mock.patch( "influxdb.InfluxDBClient", side_effect=[mock_client], create=True, ) as mock_connect: df = request.values.all().df ExportMixin(df=df).to_target("influxdb://localhost/?database=dwd&table=weather") mock_connect.assert_called_once_with( host="localhost", port=8086, username=None, password=None, database="dwd", ssl=False, ) mock_client.create_database.assert_called_once_with("dwd") mock_client.write_points.assert_called_once() mock_client.write_points.assert_called_with( points=mock.ANY, batch_size=50000, ) points = mock_client.write_points.call_args.kwargs["points"] assert points[0]["measurement"] == "weather" assert list(points[0]["tags"].keys()) == [ "station_id", "qn_3", "qn_4", "dataset", ] assert list(points[0]["fields"].keys()) == [ "wind_gust_max", "wind_speed", "precipitation_height", "precipitation_form", "sunshine_duration", "snow_depth", "cloud_cover_total", "pressure_vapor", "pressure_air_site", "temperature_air_mean_200", "humidity", "temperature_air_max_200", "temperature_air_min_200", "temperature_air_min_005", ]
def test_export_sqlite(tmpdir_factory): """Test export of DataFrame to sqlite db""" Settings.tidy = False Settings.humanize = True Settings.si_units = False request = DwdObservationRequest( parameter=DwdObservationDataset.CLIMATE_SUMMARY, resolution=DwdObservationResolution.DAILY, start_date="2019", end_date="2020", ).filter_by_station_id( station_id=[1048], ) filename = tmpdir_factory.mktemp("data").join("observations.sqlite") df = request.values.all().df ExportMixin(df=df).to_target(f"sqlite:///{filename}?table=testdrive") connection = sqlite3.connect(filename) cursor = connection.cursor() cursor.execute("SELECT * FROM testdrive") results = cursor.fetchall() cursor.close() connection.close() assert results[0] == ( "01048", "climate_summary", "2019-01-01 00:00:00.000000", 10.0, 19.9, 8.5, 10.0, 0.9, 8.0, 0.0, 0.0, 7.4, 7.9, 991.9, 5.9, 84.0, 7.5, 2.0, 1.5, ) assert results[-1] == ( "01048", "climate_summary", "2020-01-01 00:00:00.000000", 10, 6.9, 3.2, 3, 0.0, 0, 3.933, 0, 4.2, 5.7, 1005.11, 2.4, 79.0, 5.6, -2.8, -4.6, )
def test_export_feather(tmpdir_factory): """Test export of DataFrame to feather""" Settings.tidy = False Settings.humanize = True Settings.si_units = False # Request data request = DwdObservationRequest( parameter=DwdObservationDataset.CLIMATE_SUMMARY, resolution=DwdObservationResolution.DAILY, start_date="2019", end_date="2020", ).filter_by_station_id( station_id=[1048], ) df = request.values.all().df # Save to Feather file. filename = tmpdir_factory.mktemp("data").join("observations.feather") ExportMixin(df=df).to_target(f"file://{filename}") # Read back Feather file. table = feather.read_table(filename) # Validate dimensions. assert table.num_columns == 19 assert table.num_rows == 366 # Validate column names. assert table.column_names == [ "station_id", "dataset", "date", "qn_3", "wind_gust_max", "wind_speed", "qn_4", "precipitation_height", "precipitation_form", "sunshine_duration", "snow_depth", "cloud_cover_total", "pressure_vapor", "pressure_air_site", "temperature_air_mean_200", "humidity", "temperature_air_max_200", "temperature_air_min_200", "temperature_air_min_005", ] # Validate content. data = table.to_pydict() assert data["date"][0] == datetime.datetime(2019, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) assert data["temperature_air_min_005"][0] == 1.5 assert data["date"][-1] == datetime.datetime(2020, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) assert data["temperature_air_min_005"][-1] == -4.6 os.unlink(filename)
def test_export_spreadsheet(tmpdir_factory): """Test export of DataFrame to spreadsheet""" Settings.tidy = False Settings.humanize = True Settings.si_units = False # 1. Request data and save to .xlsx file. request = DwdObservationRequest( parameter=DwdObservationDataset.CLIMATE_SUMMARY, resolution=DwdObservationResolution.DAILY, start_date="2019", end_date="2020", ).filter_by_station_id( station_id=[1048], ) df = request.values.all().df filename = tmpdir_factory.mktemp("data").join("observations.xlsx") ExportMixin(df=df).to_target(f"file://{filename}") workbook = openpyxl.load_workbook(filename=filename) worksheet = workbook.active # 2. Validate some details of .xlsx file. # Validate header row. header = list(worksheet.iter_cols(min_row=1, max_row=1, values_only=True)) assert header == [ ("station_id",), ("dataset",), ("date",), ("qn_3",), ("wind_gust_max",), ("wind_speed",), ("qn_4",), ("precipitation_height",), ("precipitation_form",), ("sunshine_duration",), ("snow_depth",), ("cloud_cover_total",), ("pressure_vapor",), ("pressure_air_site",), ("temperature_air_mean_200",), ("humidity",), ("temperature_air_max_200",), ("temperature_air_min_200",), ("temperature_air_min_005",), ] # Validate number of records. assert worksheet.max_row == 367 first_record = list(worksheet.iter_cols(min_row=2, max_row=2, values_only=True)) assert first_record == [ ("01048",), ("climate_summary",), ("2019-01-01T00:00:00+00:00",), (10,), (19.9,), (8.5,), (10,), (0.9,), (8,), (0,), (0,), (7.4,), (7.9,), (991.9,), (5.9,), (84,), (7.5,), (2,), (1.5,), ] last_record = list(worksheet.iter_cols(min_row=worksheet.max_row, max_row=worksheet.max_row, values_only=True)) assert last_record == [ ("01048",), ("climate_summary",), ("2020-01-01T00:00:00+00:00",), (10,), (6.9,), (3.2,), (3,), (0,), (0,), (3.933,), (0,), (4.2,), (5.7,), (1005.11,), (2.4,), (79,), (5.6,), (-2.8,), (-4.6,), ] os.unlink(filename)