def test_filter_by_sql():
    """Test filter by sql statement"""
    df = ExportMixin(df=df_data).filter_by_sql(
        "SELECT * FROM data WHERE parameter='temperature_air_max_200' AND value < 1.5"
    )
    assert not df.empty

    df = ExportMixin(df=df_data).filter_by_sql(
        "SELECT * FROM data WHERE parameter='temperature_air_max_200' AND value > 1.5"
    )
    assert df.empty
Example #2
0
def test_filter_by_sql():
    # TODO: change this to a test of historical data
    df = ExportMixin(df=df_data).filter_by_sql(
        "SELECT * FROM data WHERE parameter='temperature_air_max_200' AND value < 1.5"
    )
    assert not df.empty

    df = ExportMixin(df=df_data).filter_by_sql(
        "SELECT * FROM data WHERE parameter='temperature_air_max_200' AND value > 1.5"
    )
    assert df.empty
def test_export_influxdb2_tidy():
    """Test export of DataFrame to influxdb v2"""
    Settings.tidy = True
    Settings.humanize = True
    Settings.si_units = False

    request = DwdObservationRequest(
        parameter=DwdObservationDataset.CLIMATE_SUMMARY,
        resolution=DwdObservationResolution.DAILY,
        period=DwdObservationPeriod.RECENT,
    ).filter_by_station_id(station_id=[1048])

    mock_client = mock.MagicMock()
    with mock.patch(
        "influxdb_client.InfluxDBClient",
        side_effect=[mock_client],
        create=True,
    ) as mock_connect:

        with mock.patch(
            "influxdb_client.Point",
            create=True,
        ):

            df = request.values.all().df
            ExportMixin(df=df).to_target("influxdb2://orga:token@localhost/?database=dwd&table=weather")

            mock_connect.assert_called_once_with(url="http://localhost:8086", org="orga", token="token")
Example #4
0
def test_export_influxdb_tidy():

    request = DwdObservationRequest(
        parameter=DwdObservationDataset.CLIMATE_SUMMARY,
        resolution=DwdObservationResolution.DAILY,
        period=DwdObservationPeriod.RECENT,
        tidy=True,
        si_units=False,
    ).filter_by_station_id(station_id=[1048])

    mock_client = mock.MagicMock()
    with mock.patch(
            "influxdb.dataframe_client.DataFrameClient",
            side_effect=[mock_client],
            create=True,
    ) as mock_connect:

        df = request.values.all().df
        ExportMixin(df=df).to_target(
            "influxdb://localhost/?database=dwd&table=weather")

        mock_connect.assert_called_once_with(database="dwd")
        mock_client.create_database.assert_called_once_with("dwd")
        mock_client.write_points.assert_called_once()

        mock_client.write_points.assert_called_with(
            dataframe=mock.ANY,
            measurement="weather",
            tag_columns=["station_id", "quality", "dataset", "parameter"],
            batch_size=50000,
        )
def test_export_cratedb():
    """Test export of DataFrame to cratedb"""
    Settings.tidy = True
    Settings.humanize = True
    Settings.si_units = False

    request = DwdObservationRequest(
        parameter=DwdObservationDataset.CLIMATE_SUMMARY,
        resolution=DwdObservationResolution.DAILY,
        period=DwdObservationPeriod.RECENT,
    ).filter_by_station_id(
        station_id=[1048],
    )

    with mock.patch(
        "pandas.DataFrame.to_sql",
    ) as mock_to_sql:

        df = request.values.all().df
        ExportMixin(df=df).to_target("crate://localhost/?database=test&table=testdrive")

        mock_to_sql.assert_called_once_with(
            name="testdrive",
            con="crate://localhost",
            schema="test",
            if_exists="replace",
            index=False,
            chunksize=5000,
        )
Example #6
0
def test_format_json():

    output = ExportMixin(df=df_data).to_json()

    response = json.loads(output)
    station_ids = list(set([reading["station_id"] for reading in response]))

    assert "01048" in station_ids
def test_format_json():
    """Test export of DataFrame to json"""
    output = ExportMixin(df=df_data).to_json()

    response = json.loads(output)
    station_ids = {reading["station_id"] for reading in response}

    assert "01048" in station_ids
Example #8
0
def test_format_csv():

    output = ExportMixin(df=df_data).to_csv().strip()

    assert "station_id,dataset,parameter,date,value,quality" in output
    assert (
        "01048,climate_summary,temperature_air_max_200,2019-12-28T00-00-00,1.3,"
        in output)
def test_to_dict():
    """Test export of DataFrame to dictioanry"""
    data = ExportMixin(df=df_data).to_dict()

    assert data == [
        {
            "dataset": "climate_summary",
            "date": "2019-12-28T00:00:00+00:00",
            "parameter": "temperature_air_max_200",
            "quality": None,
            "station_id": "01048",
            "value": 1.3,
        },
    ]
Example #10
0
def test_export_unknown():

    request = DwdObservationRequest(
        parameter=DwdObservationDataset.CLIMATE_SUMMARY,
        resolution=DwdObservationResolution.DAILY,
        period=DwdObservationPeriod.RECENT,
    ).filter_by_station_id(station_id=[1048], )

    df = request.values.all().df

    with pytest.raises(KeyError) as ex:
        ExportMixin(df=df).to_target("file:///test.foobar")

    ex.match("Unknown export file type")
Example #11
0
def test_export_influxdb1_tidy():
    """Test export of DataFrame to influxdb v1"""
    Settings.tidy = True
    Settings.humanize = True
    Settings.si_units = False

    request = DwdObservationRequest(
        parameter=DwdObservationDataset.CLIMATE_SUMMARY,
        resolution=DwdObservationResolution.DAILY,
        period=DwdObservationPeriod.RECENT,
    ).filter_by_station_id(station_id=[1048])

    mock_client = mock.MagicMock()
    with mock.patch(
        "influxdb.InfluxDBClient",
        side_effect=[mock_client],
        create=True,
    ) as mock_connect:

        df = request.values.all().df
        ExportMixin(df=df).to_target("influxdb://localhost/?database=dwd&table=weather")

        mock_connect.assert_called_once_with(
            host="localhost",
            port=8086,
            username=None,
            password=None,
            database="dwd",
            ssl=False,
        )
        mock_client.create_database.assert_called_once_with("dwd")
        mock_client.write_points.assert_called_once()

        mock_client.write_points.assert_called_with(
            points=mock.ANY,
            batch_size=50000,
        )

        points = mock_client.write_points.call_args.kwargs["points"]
        assert points[0]["measurement"] == "weather"
        assert list(points[0]["tags"].keys()) == [
            "station_id",
            "quality",
            "dataset",
            "parameter",
        ]
        assert list(points[0]["fields"].keys()) == [
            "value",
        ]
Example #12
0
def test_export_unknown():
    """Test export of DataFrame to unknown format"""
    Settings.tidy = True
    Settings.humanize = True
    Settings.si_units = True

    request = DwdObservationRequest(
        parameter=DwdObservationDataset.CLIMATE_SUMMARY,
        resolution=DwdObservationResolution.DAILY,
        period=DwdObservationPeriod.RECENT,
    ).filter_by_station_id(
        station_id=[1048],
    )

    df = request.values.all().df

    with pytest.raises(KeyError) as ex:
        ExportMixin(df=df).to_target("file:///test.foobar")

    ex.match("Unknown export file type")
Example #13
0
def test_export_duckdb():
    """Test export of DataFrame to duckdb"""
    Settings.tidy = True
    Settings.humanize = True
    Settings.si_units = False

    request = DwdObservationRequest(
        parameter=DwdObservationDataset.CLIMATE_SUMMARY,
        resolution=DwdObservationResolution.DAILY,
        period=DwdObservationPeriod.RECENT,
    ).filter_by_station_id(station_id=[1048])

    mock_connection = mock.MagicMock()
    with mock.patch("duckdb.connect", side_effect=[mock_connection], create=True) as mock_connect:

        df = request.values.all().df
        ExportMixin(df=df).to_target("duckdb:///test.duckdb?table=testdrive")

        mock_connect.assert_called_once_with(database="test.duckdb", read_only=False)
        mock_connection.register.assert_called_once()
        mock_connection.execute.assert_called()
        mock_connection.table.assert_called_once_with("testdrive")
        mock_connection.close.assert_called_once()
Example #14
0
def test_export_zarr(tmpdir_factory):
    """Test export of DataFrame to zarr"""
    Settings.tidy = False
    Settings.humanize = True
    Settings.si_units = False

    # Request data.
    request = DwdObservationRequest(
        parameter=DwdObservationDataset.CLIMATE_SUMMARY,
        resolution=DwdObservationResolution.DAILY,
        start_date="2019",
        end_date="2020",
    ).filter_by_station_id(
        station_id=[1048],
    )

    df = request.values.all().df

    # Save to Zarr group.
    filename = tmpdir_factory.mktemp("data").join("observations.zarr")
    ExportMixin(df=df).to_target(f"file://{filename}")

    # Read back Zarr group.
    group = zarr.open(str(filename), mode="r")

    # Validate dimensions.
    assert len(group) == 20
    assert len(group.index) == 366

    # Validate column names.
    assert set(group.keys()) == {
        "index",
        "station_id",
        "dataset",
        "date",
        "qn_3",
        "wind_gust_max",
        "wind_speed",
        "qn_4",
        "precipitation_height",
        "precipitation_form",
        "sunshine_duration",
        "snow_depth",
        "cloud_cover_total",
        "pressure_vapor",
        "pressure_air_site",
        "temperature_air_mean_200",
        "humidity",
        "temperature_air_max_200",
        "temperature_air_min_200",
        "temperature_air_min_005",
    }

    # Validate content.
    data = group

    assert data["date"][0] == np.datetime64(datetime.datetime(2019, 1, 1, 0, 0, tzinfo=datetime.timezone.utc))
    assert data["temperature_air_min_005"][0] == 1.5
    assert data["date"][-1] == np.datetime64(datetime.datetime(2020, 1, 1, 0, 0, tzinfo=datetime.timezone.utc))
    assert data["temperature_air_min_005"][-1] == -4.6

    shutil.rmtree(filename)
Example #15
0
def test_export_influxdb1_tabular():
    """Test export of DataFrame to influxdb v1"""
    Settings.tidy = False
    Settings.humanize = True
    Settings.si_units = False

    request = DwdObservationRequest(
        parameter=DwdObservationDataset.CLIMATE_SUMMARY,
        resolution=DwdObservationResolution.DAILY,
        period=DwdObservationPeriod.RECENT,
    ).filter_by_station_id(station_id=[1048])

    mock_client = mock.MagicMock()
    with mock.patch(
        "influxdb.InfluxDBClient",
        side_effect=[mock_client],
        create=True,
    ) as mock_connect:

        df = request.values.all().df
        ExportMixin(df=df).to_target("influxdb://localhost/?database=dwd&table=weather")

        mock_connect.assert_called_once_with(
            host="localhost",
            port=8086,
            username=None,
            password=None,
            database="dwd",
            ssl=False,
        )
        mock_client.create_database.assert_called_once_with("dwd")
        mock_client.write_points.assert_called_once()

        mock_client.write_points.assert_called_with(
            points=mock.ANY,
            batch_size=50000,
        )

        points = mock_client.write_points.call_args.kwargs["points"]
        assert points[0]["measurement"] == "weather"
        assert list(points[0]["tags"].keys()) == [
            "station_id",
            "qn_3",
            "qn_4",
            "dataset",
        ]
        assert list(points[0]["fields"].keys()) == [
            "wind_gust_max",
            "wind_speed",
            "precipitation_height",
            "precipitation_form",
            "sunshine_duration",
            "snow_depth",
            "cloud_cover_total",
            "pressure_vapor",
            "pressure_air_site",
            "temperature_air_mean_200",
            "humidity",
            "temperature_air_max_200",
            "temperature_air_min_200",
            "temperature_air_min_005",
        ]
Example #16
0
def test_export_sqlite(tmpdir_factory):
    """Test export of DataFrame to sqlite db"""
    Settings.tidy = False
    Settings.humanize = True
    Settings.si_units = False

    request = DwdObservationRequest(
        parameter=DwdObservationDataset.CLIMATE_SUMMARY,
        resolution=DwdObservationResolution.DAILY,
        start_date="2019",
        end_date="2020",
    ).filter_by_station_id(
        station_id=[1048],
    )

    filename = tmpdir_factory.mktemp("data").join("observations.sqlite")

    df = request.values.all().df
    ExportMixin(df=df).to_target(f"sqlite:///{filename}?table=testdrive")

    connection = sqlite3.connect(filename)
    cursor = connection.cursor()
    cursor.execute("SELECT * FROM testdrive")
    results = cursor.fetchall()
    cursor.close()
    connection.close()

    assert results[0] == (
        "01048",
        "climate_summary",
        "2019-01-01 00:00:00.000000",
        10.0,
        19.9,
        8.5,
        10.0,
        0.9,
        8.0,
        0.0,
        0.0,
        7.4,
        7.9,
        991.9,
        5.9,
        84.0,
        7.5,
        2.0,
        1.5,
    )

    assert results[-1] == (
        "01048",
        "climate_summary",
        "2020-01-01 00:00:00.000000",
        10,
        6.9,
        3.2,
        3,
        0.0,
        0,
        3.933,
        0,
        4.2,
        5.7,
        1005.11,
        2.4,
        79.0,
        5.6,
        -2.8,
        -4.6,
    )
Example #17
0
def test_export_feather(tmpdir_factory):
    """Test export of DataFrame to feather"""
    Settings.tidy = False
    Settings.humanize = True
    Settings.si_units = False

    # Request data
    request = DwdObservationRequest(
        parameter=DwdObservationDataset.CLIMATE_SUMMARY,
        resolution=DwdObservationResolution.DAILY,
        start_date="2019",
        end_date="2020",
    ).filter_by_station_id(
        station_id=[1048],
    )

    df = request.values.all().df

    # Save to Feather file.
    filename = tmpdir_factory.mktemp("data").join("observations.feather")
    ExportMixin(df=df).to_target(f"file://{filename}")

    # Read back Feather file.
    table = feather.read_table(filename)

    # Validate dimensions.
    assert table.num_columns == 19
    assert table.num_rows == 366

    # Validate column names.
    assert table.column_names == [
        "station_id",
        "dataset",
        "date",
        "qn_3",
        "wind_gust_max",
        "wind_speed",
        "qn_4",
        "precipitation_height",
        "precipitation_form",
        "sunshine_duration",
        "snow_depth",
        "cloud_cover_total",
        "pressure_vapor",
        "pressure_air_site",
        "temperature_air_mean_200",
        "humidity",
        "temperature_air_max_200",
        "temperature_air_min_200",
        "temperature_air_min_005",
    ]

    # Validate content.
    data = table.to_pydict()

    assert data["date"][0] == datetime.datetime(2019, 1, 1, 0, 0, tzinfo=datetime.timezone.utc)
    assert data["temperature_air_min_005"][0] == 1.5
    assert data["date"][-1] == datetime.datetime(2020, 1, 1, 0, 0, tzinfo=datetime.timezone.utc)
    assert data["temperature_air_min_005"][-1] == -4.6

    os.unlink(filename)
Example #18
0
def test_export_spreadsheet(tmpdir_factory):
    """Test export of DataFrame to spreadsheet"""
    Settings.tidy = False
    Settings.humanize = True
    Settings.si_units = False

    # 1. Request data and save to .xlsx file.
    request = DwdObservationRequest(
        parameter=DwdObservationDataset.CLIMATE_SUMMARY,
        resolution=DwdObservationResolution.DAILY,
        start_date="2019",
        end_date="2020",
    ).filter_by_station_id(
        station_id=[1048],
    )

    df = request.values.all().df

    filename = tmpdir_factory.mktemp("data").join("observations.xlsx")
    ExportMixin(df=df).to_target(f"file://{filename}")

    workbook = openpyxl.load_workbook(filename=filename)
    worksheet = workbook.active

    # 2. Validate some details of .xlsx file.

    # Validate header row.
    header = list(worksheet.iter_cols(min_row=1, max_row=1, values_only=True))
    assert header == [
        ("station_id",),
        ("dataset",),
        ("date",),
        ("qn_3",),
        ("wind_gust_max",),
        ("wind_speed",),
        ("qn_4",),
        ("precipitation_height",),
        ("precipitation_form",),
        ("sunshine_duration",),
        ("snow_depth",),
        ("cloud_cover_total",),
        ("pressure_vapor",),
        ("pressure_air_site",),
        ("temperature_air_mean_200",),
        ("humidity",),
        ("temperature_air_max_200",),
        ("temperature_air_min_200",),
        ("temperature_air_min_005",),
    ]

    # Validate number of records.
    assert worksheet.max_row == 367

    first_record = list(worksheet.iter_cols(min_row=2, max_row=2, values_only=True))
    assert first_record == [
        ("01048",),
        ("climate_summary",),
        ("2019-01-01T00:00:00+00:00",),
        (10,),
        (19.9,),
        (8.5,),
        (10,),
        (0.9,),
        (8,),
        (0,),
        (0,),
        (7.4,),
        (7.9,),
        (991.9,),
        (5.9,),
        (84,),
        (7.5,),
        (2,),
        (1.5,),
    ]

    last_record = list(worksheet.iter_cols(min_row=worksheet.max_row, max_row=worksheet.max_row, values_only=True))
    assert last_record == [
        ("01048",),
        ("climate_summary",),
        ("2020-01-01T00:00:00+00:00",),
        (10,),
        (6.9,),
        (3.2,),
        (3,),
        (0,),
        (0,),
        (3.933,),
        (0,),
        (4.2,),
        (5.7,),
        (1005.11,),
        (2.4,),
        (79,),
        (5.6,),
        (-2.8,),
        (-4.6,),
    ]

    os.unlink(filename)