def test_export_influxdb_tidy(): observations = DWDObservationData( station_ids=[1048], parameters=DWDObservationParameterSet.CLIMATE_SUMMARY, resolution=DWDObservationResolution.DAILY, periods=DWDObservationPeriod.RECENT, tidy_data=True, ) mock_client = mock.MagicMock() with mock.patch( "influxdb.dataframe_client.DataFrameClient", side_effect=[mock_client], create=True, ) as mock_connect: df = observations.all() df.dwd.lower().io.export( "influxdb://localhost/?database=dwd&table=weather") mock_connect.assert_called_once_with(database="dwd") mock_client.create_database.assert_called_once_with("dwd") mock_client.write_points.assert_called_once() mock_client.write_points.assert_called_with( dataframe=mock.ANY, measurement="weather", tag_columns=[ "station_id", "quality", "parameter_set", "parameter" ], batch_size=50000, )
def test_coerce_field_types(): df = pd.DataFrame({ "STATION_ID": ["00001"], "QN": ["1"], "RS_IND_01": ["1"], "DATE": ["1970010100"], "END_OF_INTERVAL": ["1970010100:00"], "V_VV_I": ["P"], }) def __init__(self): self.tidy_data = False self.resolution = DWDObservationResolution.HOURLY with patch.object(DWDObservationData, "__init__", __init__): observations = DWDObservationData() df = observations._coerce_meta_fields(df) df = observations._coerce_parameter_types(df) expected_df = pd.DataFrame({ "STATION_ID": pd.Series(["00001"], dtype="category"), "QN": pd.Series([1], dtype=pd.Int64Dtype()), "RS_IND_01": pd.Series([1], dtype=pd.Int64Dtype()), "DATE": [pd.Timestamp("1970-01-01").tz_localize("UTC")], "END_OF_INTERVAL": [pd.Timestamp("1970-01-01")], "V_VV_I": pd.Series(["P"], dtype=pd.StringDtype()), }) assert_frame_equal(df, expected_df)
def test_request(): observations = DWDObservationData( station_ids=[1048], parameters=DWDObservationParameterSet.CLIMATE_SUMMARY, resolution=DWDObservationResolution.DAILY, periods=DWDObservationPeriod.RECENT, ) df = observations.collect_safe() assert not df.empty
def get_weatherdata(station_id, weather_parameters, resolution, periods, start_date, end_date): """ --- Return timeseries of DWD weatherstations with userdefined parameters, spatial and temporal resoultion ---- station_id : list of station number(s) eg. [4928] weather_parameters: list of parameters in format: DWDObservationParameterSet.PARAMETERNAME, resolution : resolution time in format: DWDObservationResolution.RESOLUTION periods : list of observation period in format: DWDObservationPeriod.PERIOD start_date : string indicating the start date in format "YYYY-MM-DD" end_date : string indicating the end date in format "YYYY-MM-DD" return pandas DataFrame with selected observations Further information for parameter types: https://wetterdienst.readthedocs.io/_/downloads/en/latest/pdf/ If resolution has not a certain parameter: --> Atrribute error """ observations = DWDObservationData( station_ids=station_id, parameters=weather_parameters, resolution=resolution, periods=periods, start_date=start_date, end_date=end_date, tidy_data=True, humanize_column_names=True).collect_safe() return observations
def sql_example(): observations = DWDObservationData( station_ids=[1048], parameters=[DWDObservationParameterSet.TEMPERATURE_AIR], resolution=DWDObservationResolution.HOURLY, start_date="2019-01-01", end_date="2020-01-01", tidy_data=True, humanize_parameters=True, ) sql = "SELECT * FROM data WHERE parameter='temperature_air_200' AND value < -7.0;" log.info(f"Invoking SQL query '{sql}'") df = observations.all() df = df.dwd.lower().io.sql(sql) print(df)
def test_export_crate(): observations = DWDObservationData( station_ids=[1048], parameters=DWDObservationParameterSet.CLIMATE_SUMMARY, resolution=DWDObservationResolution.DAILY, periods=DWDObservationPeriod.RECENT, ) with mock.patch("pandas.DataFrame.to_sql", ) as mock_to_sql: df = observations.collect_safe() df.io.export("crate://localhost/?database=test&table=testdrive") mock_to_sql.assert_called_once_with( name="testdrive", con="crate://localhost/?database=test&table=testdrive", if_exists="replace", index=False, method="multi", chunksize=5000, )
def test_export_influxdb(): observations = DWDObservationData( station_ids=[1048], parameters=DWDObservationParameterSet.CLIMATE_SUMMARY, resolution=DWDObservationResolution.DAILY, periods=DWDObservationPeriod.RECENT, ) mock_client = mock.MagicMock() with mock.patch( "influxdb.dataframe_client.DataFrameClient", side_effect=[mock_client], create=True, ) as mock_connect: df = observations.collect_safe() df.dwd.lower().io.export( "influxdb://localhost/?database=dwd&table=weather") mock_connect.assert_called_once_with(database="dwd") mock_client.create_database.assert_called_once_with("dwd") mock_client.write_points.assert_called_once()
def test_export_duckdb(): observations = DWDObservationData( station_ids=[1048], parameters=DWDObservationParameterSet.CLIMATE_SUMMARY, resolution=DWDObservationResolution.DAILY, periods=DWDObservationPeriod.RECENT, ) mock_connection = mock.MagicMock() with mock.patch("duckdb.connect", side_effect=[mock_connection], create=True) as mock_connect: df = observations.collect_safe() df.io.export("duckdb:///test.duckdb?table=testdrive") mock_connect.assert_called_once_with(database="test.duckdb", read_only=False) mock_connection.register.assert_called_once() mock_connection.execute.assert_called() mock_connection.table.assert_called_once_with("testdrive") # a.table.to_df.assert_called() mock_connection.close.assert_called_once()
def test_coerce_field_types_with_nans(): df = pd.DataFrame({ "QN": [pd.NA, np.nan, "1"], "RS_IND_01": [pd.NA, np.nan, "1"], "V_VV_I": [pd.NA, np.nan, "P"], }) expected_df = pd.DataFrame({ "QN": pd.Series([pd.NA, np.nan, 1], dtype=pd.Int64Dtype()), "RS_IND_01": pd.Series([pd.NA, np.nan, 1], dtype=pd.Int64Dtype()), "V_VV_I": pd.Series([pd.NA, np.nan, "P"], dtype=pd.StringDtype()), }) def __init__(self): self.tidy_data = False with mock.patch.object(DWDObservationData, "__init__", new=__init__): df = DWDObservationData()._coerce_parameter_types(df) assert_frame_equal(df, expected_df)
def dwd_readings( product: str, station: str = Query(default=None), parameter: str = Query(default=None), resolution: str = Query(default=None), period: str = Query(default=None), mosmix_type: str = Query(default=None, alias="mosmix-type"), date: str = Query(default=None), sql: str = Query(default=None), ): """ Acquire data from DWD. # TODO: Obtain lat/lon distance/number information. :param product: string for product, either observations or mosmix :param station: Comma-separated list of station identifiers. :param parameter: Observation measure :param resolution: Frequency/granularity of measurement interval :param period: Recent or historical files :param mosmix_type type of mosmix, either small or large :param date: Date or date range :param sql: SQL expression :return: """ if product not in ["observations", "mosmix"]: return HTTPException(status_code=404, detail=f"product {product} not found") if station is None: raise HTTPException( status_code=400, detail="Query argument 'station' is required" ) station_ids = map(str, read_list(station)) if product == "observations": if parameter is None or resolution is None or period is None: raise HTTPException( status_code=400, detail="Query arguments 'parameter', 'resolution' " "and 'period' are required", ) parameter = parse_enumeration_from_template( parameter, DWDObservationParameterSet ) resolution = parse_enumeration_from_template( resolution, DWDObservationResolution ) period = parse_enumeration_from_template(period, DWDObservationPeriod) # Data acquisition. readings = DWDObservationData( station_ids=station_ids, parameters=parameter, resolution=resolution, periods=period, tidy_data=True, humanize_parameters=True, ) else: if mosmix_type is None: raise HTTPException( status_code=400, detail="Query argument 'mosmix_type' is required" ) mosmix_type = parse_enumeration_from_template(mosmix_type, DWDMosmixType) readings = DWDMosmixData(station_ids=station_ids, mosmix_type=mosmix_type) # Postprocessing. df = readings.all() if date is not None: df = df.dwd.filter_by_date(date, resolution) df = df.dwd.lower() if sql is not None: df = df.io.sql(sql) data = json.loads(df.to_json(orient="records", date_format="iso")) return make_json_response(data)
def dwd_readings( station: str = Query(default=None), parameter: str = Query(default=None), resolution: str = Query(default=None), period: str = Query(default=None), date: str = Query(default=None), sql: str = Query(default=None), ): """ Acquire data from DWD. # TODO: Obtain lat/lon distance/number information. :param station: Comma-separated list of station identifiers. :param parameter: Observation measure :param resolution: Frequency/granularity of measurement interval :param period: Recent or historical files :param date: Date or date range :param sql: SQL expression :return: """ if station is None: raise HTTPException( status_code=400, detail="Query argument 'station' is required" ) if parameter is None or resolution is None or period is None: raise HTTPException( status_code=400, detail="Query arguments 'parameter', 'resolution' " "and 'period' are required", ) station_ids = map(int, read_list(station)) parameter = parse_enumeration_from_template(parameter, DWDObservationParameterSet) resolution = parse_enumeration_from_template(resolution, DWDObservationResolution) period = parse_enumeration_from_template(period, DWDObservationPeriod) # Data acquisition. observations = DWDObservationData( station_ids=station_ids, parameters=parameter, resolution=resolution, periods=period, tidy_data=True, humanize_column_names=True, ) # Postprocessing. df = observations.collect_safe() if date is not None: df = df.dwd.filter_by_date(date, resolution) df = df.dwd.lower() if sql is not None: df = df.io.sql(sql) data = json.loads(df.to_json(orient="records", date_format="iso")) return make_json_response(data)