Ejemplo n.º 1
0
def test_dwd_observations_stations_geojson():

    # Existing combination of parameters
    request = DwdObservationRequest(
        DwdObservationDataset.CLIMATE_SUMMARY,
        DwdObservationResolution.DAILY,
        DwdObservationPeriod.HISTORICAL,
    )

    results = request.filter_by_station_id(station_id=("00001", ))

    assert not results.df.empty

    geojson = results.to_ogc_feature_collection()

    properties = geojson["features"][0]["properties"]
    geometry = geojson["features"][0]["geometry"]

    assert properties["name"] == "Aach"
    assert properties["state"] == "Baden-Württemberg"

    assert geometry == {
        "type": "Point",
        "coordinates": [8.8493, 47.8413, 478.0],
    }
Ejemplo n.º 2
0
def test_dwd_observations_stations_filter_name_empty():

    # Existing combination of parameters
    request = DwdObservationRequest(
        DwdObservationDataset.CLIMATE_SUMMARY,
        DwdObservationResolution.DAILY,
        DwdObservationPeriod.HISTORICAL,
    )

    df = request.filter_by_name(name="FizzBuzz").df

    assert df.empty
Ejemplo n.º 3
0
def test_dwd_observations_stations_filter_name():

    # Existing combination of parameters
    request = DwdObservationRequest(
        DwdObservationDataset.CLIMATE_SUMMARY,
        DwdObservationResolution.DAILY,
        DwdObservationPeriod.HISTORICAL,
    )

    df = request.filter_by_name(name="Aach").df

    assert not df.empty

    assert_frame_equal(df, EXPECTED_DF)
Ejemplo n.º 4
0
def about(options: Munch):
    """
    Output possible arguments for command line options
    "--parameter", "--resolution" and "--period".

    :param options: Normalized docopt command line options.
    """
    def output(thing):
        for item in thing:
            if item:
                if hasattr(item, "value"):
                    value = item.value
                else:
                    value = item
                print("-", value)

    if options.parameters:
        output(DwdObservationDataset)

    elif options.resolutions:
        output(DwdObservationResolution)

    elif options.periods:
        output(DwdObservationPeriod)

    elif options.coverage:
        metadata = DwdObservationRequest.discover(
            filter_=options.resolution,
            dataset=read_list(options.parameter),
            flatten=False,
        )
        output = json.dumps(metadata, indent=4)
        print(output)

    elif options.fields:
        metadata = DwdObservationRequest.describe_fields(
            dataset=read_list(options.parameter),
            resolution=options.resolution,
            period=read_list(options.period),
            language=options.language,
        )
        output = pformat(dict(metadata))
        print(output)

    else:
        log.error(
            'Please invoke "wetterdienst dwd about" with one of these subcommands:'
        )
        output(["parameters", "resolutions", "periods", "coverage"])
        sys.exit(1)
Ejemplo n.º 5
0
def test_dwd_observation_data_result_missing_data():
    """Test for DataFrame having empty values for dates where the station should not
    have values"""
    Settings.tidy = True
    Settings.humanize = True
    Settings.si_units = True

    request = DwdObservationRequest(
        parameter=[DwdObservationDataset.CLIMATE_SUMMARY],
        resolution=DwdObservationResolution.DAILY,
        start_date="1933-12-27",  # few days before official start
        end_date="1934-01-04",  # few days after official start,
    ).filter_by_station_id(station_id=[1048], )

    # Leave only one column to potentially contain NaN which is VALUE
    df = request.values.all().df.drop("quality", axis=1)

    df_1933 = df[df["date"].dt.year == 1933]
    df_1934 = df[df["date"].dt.year == 1934]

    assert not df_1933.empty and df_1933.dropna().empty
    assert not df_1934.empty and not df_1934.dropna().empty

    request = DwdObservationRequest(
        parameter=DwdObservationParameter.HOURLY.TEMPERATURE_AIR_MEAN_200,
        resolution=DwdObservationResolution.HOURLY,
        start_date=
        "2020-06-09 12:00:00",  # no data at this time (reason unknown)
        end_date="2020-06-09 12:00:00",
    ).filter_by_station_id(station_id=["03348"], )

    df = request.values.all().df

    assert_frame_equal(
        df,
        pd.DataFrame({
            "station_id":
            pd.Categorical(["03348"]),
            "dataset":
            pd.Categorical(["temperature_air"]),
            "parameter":
            pd.Categorical(["temperature_air_mean_200"]),
            "date": [datetime(2020, 6, 9, 12, 0, 0, tzinfo=pytz.UTC)],
            "value":
            pd.Series([pd.NA], dtype=pd.Float64Dtype()).astype(float),
            "quality":
            pd.Series([pd.NA], dtype=pd.Float64Dtype()).astype(float),
        }),
        check_categorical=False,
    )
Ejemplo n.º 6
0
def test_dwd_observations_stations_fail():
    with pytest.raises(TypeError):
        DwdObservationRequest(
            DwdObservationDataset.CLIMATE_SUMMARY,
            DwdObservationResolution.DAILY,
            DwdObservationPeriod.HISTORICAL,
        ).filter_by_station_id(name="FizzBuzz")

    with pytest.raises(TypeError):
        DwdObservationRequest(
            DwdObservationDataset.CLIMATE_SUMMARY,
            DwdObservationResolution.DAILY,
            DwdObservationPeriod.HISTORICAL,
        ).filter_by_name(name=123)
Ejemplo n.º 7
0
def test_dwd_observation_data_fails():
    # station id
    assert (DwdObservationRequest(
        parameter=[DwdObservationDataset.CLIMATE_SUMMARY],
        period=[DwdObservationPeriod.HISTORICAL],
        resolution=DwdObservationResolution.DAILY,
    ).filter_by_station_id(station_id=["test"], ).df.empty)

    with pytest.raises(StartDateEndDateError):
        DwdObservationRequest(
            parameter=["abc"],
            resolution=DwdObservationResolution.DAILY,
            start_date="1971-01-01",
            end_date="1951-01-01",
        )
Ejemplo n.º 8
0
def test_create_humanized_column_names_mapping():
    """Test for function to create a mapping to humanized column names"""
    kl_daily_hcnm = {
        "fx": "wind_gust_max",
        "fm": "wind_speed",
        "rsk": "precipitation_height",
        "rskf": "precipitation_form",
        "sdk": "sunshine_duration",
        "shk_tag": "snow_depth",
        "nm": "cloud_cover_total",
        "vpm": "pressure_vapor",
        "pm": "pressure_air_site",
        "tmk": "temperature_air_mean_200",
        "upm": "humidity",
        "txk": "temperature_air_max_200",
        "tnk": "temperature_air_min_200",
        "tgk": "temperature_air_min_005",
    }
    hcnm = (DwdObservationRequest(
        [DwdObservationDataset.CLIMATE_SUMMARY],
        DwdObservationResolution.DAILY,
        [DwdObservationPeriod.RECENT],
    ).filter_by_station_id(
        (0, ), ).values._create_humanized_parameters_mapping())

    assert set(kl_daily_hcnm.items()).issubset(set(hcnm.items()))
Ejemplo n.º 9
0
def test_dwd_observation_weather_phenomena():
    """Test for DWD weather phenomena data, thanks saschnet (https://github.com/saschnet) for providing the sample,
    see also https://github.com/earthobservations/wetterdienst/issues/647
    """
    Settings.tidy = True
    Settings.humanize = False
    Settings.si_units = False

    request = DwdObservationRequest(
        resolution=DwdObservationResolution.HOURLY,
        parameter=[DwdObservationParameter.HOURLY.WEATHER_PHENOMENA.WEATHER],
        start_date=datetime(year=2022, month=3, day=1, tzinfo=timezone.utc),
        end_date=datetime(year=2022, month=3, day=31, tzinfo=timezone.utc),
    )
    res = request.all().df.dropna()
    assert len(res) > 0
Ejemplo n.º 10
0
def test_request_period_recent_now():
    request = DwdObservationRequest(
        parameter=[DwdObservationDataset.CLIMATE_SUMMARY],
        resolution=DwdObservationResolution.DAILY,
        start_date=pd.Timestamp(datetime.utcnow()) - pd.Timedelta(hours=2),
    )
    assert request.period == [Period.RECENT, Period.NOW]
Ejemplo n.º 11
0
def test_request_period_now_fixeddate():

    # Now period
    request = DwdObservationRequest(
        parameter=[DwdObservationDataset.CLIMATE_SUMMARY],
        resolution=DwdObservationResolution.DAILY,
        start_date=pd.Timestamp(datetime.utcnow()) - pd.Timedelta(hours=2),
    )
    assert Period.NOW in request.period
Ejemplo n.º 12
0
def test_request_period_empty():
    # No period (for example in future)
    request = DwdObservationRequest(
        parameter=[DwdObservationDataset.CLIMATE_SUMMARY],
        resolution=DwdObservationResolution.DAILY,
        start_date=pd.Timestamp(datetime.utcnow()) + pd.Timedelta(days=720),
    )

    assert request.period == []
Ejemplo n.º 13
0
def test_dwd_observation_data_10_minutes_result_tidy():
    """Test for actual values (tidy) in metric units"""
    Settings.tidy = True
    Settings.humanize = False
    Settings.si_units = False

    request = DwdObservationRequest(
        parameter=[
            DwdObservationParameter.MINUTE_10.TEMPERATURE_AIR.PRESSURE_AIR_SITE
        ],
        resolution=DwdObservationResolution.MINUTE_10,
        start_date="1999-12-31 22:00",
        end_date="1999-12-31 23:00",
    ).filter_by_station_id(station_id=(1048, ), )

    df = request.values.all().df

    assert_frame_equal(
        df,
        pd.DataFrame(
            {
                "station_id":
                pd.Categorical(["01048"] * 7),
                "dataset":
                pd.Categorical(["temperature_air"] * 7),
                "parameter":
                pd.Categorical(["pp_10"] * 7),
                "date": [
                    datetime(1999, 12, 31, 22, 00, tzinfo=pytz.UTC),
                    datetime(1999, 12, 31, 22, 10, tzinfo=pytz.UTC),
                    datetime(1999, 12, 31, 22, 20, tzinfo=pytz.UTC),
                    datetime(1999, 12, 31, 22, 30, tzinfo=pytz.UTC),
                    datetime(1999, 12, 31, 22, 40, tzinfo=pytz.UTC),
                    datetime(1999, 12, 31, 22, 50, tzinfo=pytz.UTC),
                    datetime(1999, 12, 31, 23, 00, tzinfo=pytz.UTC),
                ],
                "value":
                pd.to_numeric(
                    [
                        996.1,
                        996.2,
                        996.2,
                        996.2,
                        996.3,
                        996.4,
                        pd.NA,
                    ],
                    errors="coerce",
                ).astype(float),
                "quality":
                pd.to_numeric([1, 1, 1, 1, 1, 1, pd.NA],
                              errors="coerce").astype(float),
            }, ),
        # Needed since pandas 1.2?
        check_categorical=False,
    )
Ejemplo n.º 14
0
def test_dwd_observation_data_api():
    request = DwdObservationRequest(
        parameter=["precipitation_height"],
        resolution="daily",
        period=["recent", "historical"],
    )

    assert request == DwdObservationRequest(
        parameter=[DwdObservationParameter.DAILY.PRECIPITATION_HEIGHT],
        resolution=Resolution.DAILY,
        period=[Period.HISTORICAL, Period.RECENT],
        start_date=None,
        end_date=None,
    )

    assert request.parameter == [(
        DwdObservationParameter.DAILY.CLIMATE_SUMMARY.PRECIPITATION_HEIGHT,
        DwdObservationDataset.CLIMATE_SUMMARY,
    )]
Ejemplo n.º 15
0
def test_dwd_observation_data_dates():
    # time input
    request = DwdObservationRequest(
        parameter=[DwdObservationDataset.CLIMATE_SUMMARY],
        resolution=DwdObservationResolution.DAILY,
        start_date="1971-01-01",
    ).filter_by_station_id(station_id=[1], )

    assert request == DwdObservationRequest(
        parameter=[DwdObservationDataset.CLIMATE_SUMMARY],
        resolution=DwdObservationResolution.DAILY,
        period=[
            DwdObservationPeriod.HISTORICAL,
        ],
        start_date=datetime(1971, 1, 1),
        end_date=datetime(1971, 1, 1),
    ).filter_by_station_id(station_id=[1], )

    request = DwdObservationRequest(
        parameter=[DwdObservationDataset.CLIMATE_SUMMARY],
        resolution=DwdObservationResolution.DAILY,
        period=[DwdObservationPeriod.HISTORICAL],
        end_date="1971-01-01",
    ).filter_by_station_id(station_id=[1], )

    assert request == DwdObservationRequest(
        parameter=[DwdObservationDataset.CLIMATE_SUMMARY],
        resolution=DwdObservationResolution.DAILY,
        period=[
            DwdObservationPeriod.HISTORICAL,
        ],
        start_date=datetime(1971, 1, 1),
        end_date=datetime(1971, 1, 1),
    ).filter_by_station_id(station_id=[1], )

    with pytest.raises(StartDateEndDateError):
        DwdObservationRequest(
            parameter=[DwdObservationDataset.CLIMATE_SUMMARY],
            resolution=DwdObservationResolution.DAILY,
            start_date="1971-01-01",
            end_date="1951-01-01",
        )
Ejemplo n.º 16
0
def test_dwd_observation_data_parameter():
    request = DwdObservationRequest(
        parameter=["precipitation_height"],
        resolution="daily",
        period=["recent", "historical"],
    ).filter_by_station_id(station_id=[1], )

    assert request == DwdObservationRequest(
        parameter=[DwdObservationParameter.DAILY.PRECIPITATION_HEIGHT],
        resolution=Resolution.DAILY,
        period=[Period.HISTORICAL, Period.RECENT],
        start_date=None,
        end_date=None,
    ).filter_by_station_id(station_id=[1], )

    assert request.parameter == [(
        DwdObservationDatasetTree.DAILY.PRECIPITATION_MORE.
        PRECIPITATION_HEIGHT,  # Noqa: E501, B950
        DwdObservationDataset.PRECIPITATION_MORE,
    )]
Ejemplo n.º 17
0
def test_request_period_historical():
    # Historical period expected
    request = DwdObservationRequest(
        parameter=[DwdObservationDataset.CLIMATE_SUMMARY],
        resolution=DwdObservationResolution.DAILY,
        start_date="1971-01-01",
    )

    assert request.period == [
        Period.HISTORICAL,
    ]
Ejemplo n.º 18
0
def test_dwd_observation_data_parameter_dataset_pairs():
    """Test parameters given as parameter - dataset pair"""
    request = DwdObservationRequest(
        parameter=[("climate_summary", "climate_summary")],
        resolution="daily",
        period=["recent", "historical"],
    )

    assert request.parameter == [(DwdObservationDataset.CLIMATE_SUMMARY,
                                  DwdObservationDataset.CLIMATE_SUMMARY)]

    request = DwdObservationRequest(
        parameter=[("precipitation_height", "precipitation_more")],
        resolution="daily",
        period=["recent", "historical"],
    )

    assert request.parameter == [(
        DwdObservationParameter.DAILY.PRECIPITATION_MORE.PRECIPITATION_HEIGHT,
        DwdObservationDataset.PRECIPITATION_MORE,
    )]
Ejemplo n.º 19
0
def test_dwd_observation_data_parameter():
    """Test parameter given as single value without dataset"""
    request = DwdObservationRequest(
        parameter=["precipitation_height"],
        resolution="daily",
        period=["recent", "historical"],
    )

    assert request.parameter == [(
        DwdObservationParameter.DAILY.CLIMATE_SUMMARY.PRECIPITATION_HEIGHT,
        DwdObservationDataset.CLIMATE_SUMMARY,
    )]

    request = DwdObservationRequest(
        parameter=["climate_summary"],
        resolution="daily",
        period=["recent", "historical"],
    )

    assert request.parameter == [(DwdObservationDataset.CLIMATE_SUMMARY,
                                  DwdObservationDataset.CLIMATE_SUMMARY)]
Ejemplo n.º 20
0
def test_request_period_historical_recent():
    # Historical and recent period expected
    request = DwdObservationRequest(
        parameter=[DwdObservationDataset.CLIMATE_SUMMARY],
        resolution=DwdObservationResolution.DAILY,
        start_date="1971-01-01",
        end_date=pd.Timestamp(datetime.utcnow()) - pd.Timedelta(days=400),
    )

    assert request.period == [
        Period.HISTORICAL,
        Period.RECENT,
    ]
Ejemplo n.º 21
0
def test_dwd_observation_data_dataset():
    """Request a parameter set"""
    expected = DwdObservationRequest(
        parameter=["kl"],
        resolution="daily",
        period=["recent", "historical"],
    ).filter_by_station_id(station_id=(1, ))

    given = DwdObservationRequest(
        parameter=[DwdObservationDataset.CLIMATE_SUMMARY],
        resolution=DwdObservationResolution.DAILY,
        period=[DwdObservationPeriod.HISTORICAL, DwdObservationPeriod.RECENT],
        start_date=None,
        end_date=None,
    ).filter_by_station_id(station_id=(1, ), )

    assert given == expected

    expected = DwdObservationRequest(
        parameter=[DwdObservationDataset.CLIMATE_SUMMARY],
        resolution=DwdObservationResolution.DAILY,
        period=[DwdObservationPeriod.HISTORICAL, DwdObservationPeriod.RECENT],
    ).filter_by_station_id(station_id=(1, ), )

    given = DwdObservationRequest(
        parameter=[DwdObservationDataset.CLIMATE_SUMMARY],
        resolution=DwdObservationResolution.DAILY,
        period=[DwdObservationPeriod.HISTORICAL, DwdObservationPeriod.RECENT],
        start_date=None,
        end_date=None,
    ).filter_by_station_id(station_id=(1, ), )

    assert expected == given

    assert expected.parameter == [(
        DwdObservationDataset.CLIMATE_SUMMARY,
        DwdObservationDataset.CLIMATE_SUMMARY,
    )]
Ejemplo n.º 22
0
def test_dwd_observation_data_result_all_missing_data():
    Settings.tidy = True
    Settings.humanize = True
    Settings.si_units = True

    stations = DwdObservationRequest(
        parameter=Parameter.PRECIPITATION_HEIGHT.name,
        resolution=DwdObservationResolution.MINUTE_10,
        start_date=datetime(2021, 10, 1),
        end_date=datetime(2021, 10, 5),
    ).filter_by_station_id(["01851"])

    values = stations.values.all().df

    assert all(values.value.isna())
Ejemplo n.º 23
0
def test_dwd_observation_data_monthly_tidy():
    """Test for actual values (tidy) in metric units"""
    Settings.tidy = True
    Settings.humanize = True
    Settings.si_units = True

    request = DwdObservationRequest(
        parameter=[DwdObservationParameter.MONTHLY.PRECIPITATION_HEIGHT],
        resolution=DwdObservationResolution.MONTHLY,
        start_date="2020-01-01",
        end_date="2020-12-31",
    ).filter_by_station_id("00433")

    values = request.values.all().df

    expected_df = pd.DataFrame(
        {
            "station_id":
            pd.Categorical(["00433"] * 12),
            "dataset":
            pd.Categorical(["climate_summary"] * 12),
            "parameter":
            pd.Categorical(["precipitation_height"] * 12),
            "date": [
                Timestamp("2020-01-01 00:00:00+0000", tz="UTC"),
                Timestamp("2020-02-01 00:00:00+0000", tz="UTC"),
                Timestamp("2020-03-01 00:00:00+0000", tz="UTC"),
                Timestamp("2020-04-01 00:00:00+0000", tz="UTC"),
                Timestamp("2020-05-01 00:00:00+0000", tz="UTC"),
                Timestamp("2020-06-01 00:00:00+0000", tz="UTC"),
                Timestamp("2020-07-01 00:00:00+0000", tz="UTC"),
                Timestamp("2020-08-01 00:00:00+0000", tz="UTC"),
                Timestamp("2020-09-01 00:00:00+0000", tz="UTC"),
                Timestamp("2020-10-01 00:00:00+0000", tz="UTC"),
                Timestamp("2020-11-01 00:00:00+0000", tz="UTC"),
                Timestamp("2020-12-01 00:00:00+0000", tz="UTC"),
            ],
            "value":
            pd.to_numeric([
                34.0, 83.2, 30.3, 22.7, 33.3, 35.8, 46.8, 43.2, 52.8, 58.2,
                16.4, 22.1
            ],
                          errors="coerce"),
            "quality":
            pd.to_numeric([9.0] * 12, errors="coerce"),
        }, )

    assert_frame_equal(values, expected_df, check_categorical=False)
Ejemplo n.º 24
0
def dwd_stations(
    kind: str,
    parameter: str = Query(default=None),
    resolution: str = Query(default=None),
    period: str = Query(default=None),
    mosmix_type: str = Query(default=None),
    longitude: float = Query(default=None),
    latitude: float = Query(default=None),
    rank: int = Query(default=None),
    distance: int = Query(default=None),
    sql: str = Query(default=None),
):
    if kind not in ["observation", "forecast"]:
        return HTTPException(status_code=404, detail=f"product {kind} not found")

    # Data acquisition.
    if kind == "observation":
        if parameter is None or resolution is None or period is None:
            raise HTTPException(
                status_code=400,
                detail="Query arguments 'parameter', 'resolution' "
                "and 'period' are required",
            )

        stations = DwdObservationRequest(
            parameter=parameter, resolution=resolution, period=period, si_units=False
        )
    else:
        stations = DwdMosmixRequest(
            parameter=parameter, mosmix_type=mosmix_type, si_units=False
        )

    if longitude and latitude and (rank or distance):
        if rank:
            results = stations.filter_by_rank(
                latitude=latitude, longitude=longitude, rank=rank
            )
        else:
            results = stations.filter_by_distance(
                latitude=latitude, longitude=longitude, distance=distance, unit="km"
            )
    else:
        results = stations.all()

    # Postprocessing.
    if sql is not None:
        results.filter_by_sql(sql)
    results.fill_gaps()

    return make_json_response(results.to_dict())
Ejemplo n.º 25
0
def run():
    """
    Usage:
      wetterdienst dwd observation stations --parameter=<parameter> --resolution=<resolution> --period=<period> [--station=<station>] [--latitude=<latitude>] [--longitude=<longitude>] [--rank=<rank>] [--distance=<distance>] [--sql=<sql>] [--format=<format>] [--target=<target>]
      wetterdienst dwd observation values --parameter=<parameter> --resolution=<resolution> [--station=<station>] [--period=<period>] [--date=<date>] [--tidy] [--sql=<sql>] [--format=<format>] [--target=<target>]
      wetterdienst dwd observation values --parameter=<parameter> --resolution=<resolution> --latitude=<latitude> --longitude=<longitude> [--period=<period>] [--rank=<rank>] [--distance=<distance>] [--tidy] [--date=<date>] [--sql=<sql>] [--format=<format>] [--target=<target>]
      wetterdienst dwd forecast stations [--parameter=<parameter>] [--mosmix-type=<mosmix-type>] [--date=<date>] [--station=<station>] [--latitude=<latitude>] [--longitude=<longitude>] [--rank=<rank>] [--distance=<distance>] [--sql=<sql>] [--format=<format>] [--target=<target>]
      wetterdienst dwd forecast values --parameter=<parameter> [--mosmix-type=<mosmix-type>] --station=<station> [--date=<date>] [--tidy] [--sql=<sql>] [--format=<format>] [--target=<target>]
      wetterdienst dwd about [parameters] [resolutions] [periods]
      wetterdienst dwd about coverage [--parameter=<parameter>] [--resolution=<resolution>] [--period=<period>]
      wetterdienst dwd about fields --parameter=<parameter> --resolution=<resolution> --period=<period> [--language=<language>]
      wetterdienst radar stations [--odim-code=<odim-code>] [--wmo-code=<wmo-code>] [--country-name=<country-name>]
      wetterdienst dwd radar stations
      wetterdienst restapi [--listen=<listen>] [--reload]
      wetterdienst explorer [--listen=<listen>] [--reload]
      wetterdienst --version
      wetterdienst (-h | --help)

    Options:
      --parameter=<parameter>       Parameter Set/Parameter, e.g. "kl" or "precipitation_height", etc.
      --resolution=<resolution>     Dataset resolution: "annual", "monthly", "daily", "hourly", "minute_10", "minute_1"
      --period=<period>             Dataset period: "historical", "recent", "now"
      --station=<station>           Comma-separated list of station identifiers
      --latitude=<latitude>         Latitude for filtering by geoposition.
      --longitude=<longitude>       Longitude for filtering by geoposition.
      --rank=<rank>                 Rank of nearby stations when filtering by geoposition.
      --distance=<distance>         Maximum distance in km when filtering by geoposition.
      --date=<date>                 Date for filtering data. Can be either a single date(time) or
                                    an ISO-8601 time interval, see https://en.wikipedia.org/wiki/ISO_8601#Time_intervals.
      --mosmix-type=<mosmix-type>   type of mosmix, either 'small' or 'large'
      --sql=<sql>                   SQL query to apply to DataFrame.
      --format=<format>             Output format. [Default: json]
      --target=<target>             Output target for storing data into different data sinks.
      --language=<language>         Output language. [Default: en]
      --version                     Show version information
      --debug                       Enable debug messages
      --listen=<listen>             HTTP server listen address.
      --reload                      Run service and dynamically reload changed files
      -h --help                     Show this screen


    Examples requesting observation stations:

      # Get list of all stations for daily climate summary data in JSON format
      wetterdienst dwd observation stations --parameter=kl --resolution=daily --period=recent

      # Get list of all stations in CSV format
      wetterdienst dwd observation stations --parameter=kl --resolution=daily --period=recent --format=csv

      # Get list of specific stations
      wetterdienst dwd observation stations --resolution=daily --parameter=kl --period=recent --station=1,1048,4411

      # Get list of specific stations in GeoJSON format
      wetterdienst dwd observation stations --resolution=daily --parameter=kl --period=recent --station=1,1048,4411 --format=geojson

    Examples requesting observation values:

      # Get daily climate summary data for specific stations
      wetterdienst dwd observation values --station=1048,4411 --parameter=kl --resolution=daily --period=recent

      # Get daily climate summary data for specific stations in CSV format
      wetterdienst dwd observation values --station=1048,4411 --parameter=kl --resolution=daily --period=recent

      # Get daily climate summary data for specific stations in tidy format
      wetterdienst dwd observation values --station=1048,4411 --parameter=kl --resolution=daily --period=recent --tidy

      # Limit output to specific date
      wetterdienst dwd observation values --station=1048,4411 --parameter=kl --resolution=daily --period=recent --date=2020-05-01

      # Limit output to specified date range in ISO-8601 time interval format
      wetterdienst dwd observation values --station=1048,4411 --parameter=kl --resolution=daily --period=recent --date=2020-05-01/2020-05-05

      # The real power horse: Acquire data across historical+recent data sets
      wetterdienst dwd observation values --station=1048,4411 --parameter=kl --resolution=daily --date=1969-01-01/2020-06-11

      # Acquire monthly data for 2020-05
      wetterdienst dwd observation values --station=1048,4411 --parameter=kl --resolution=monthly --date=2020-05

      # Acquire monthly data from 2017-01 to 2019-12
      wetterdienst dwd observation values --station=1048,4411 --parameter=kl --resolution=monthly --date=2017-01/2019-12

      # Acquire annual data for 2019
      wetterdienst dwd observation values --station=1048,4411 --parameter=kl --resolution=annual --date=2019

      # Acquire annual data from 2010 to 2020
      wetterdienst dwd observation values --station=1048,4411 --parameter=kl --resolution=annual --date=2010/2020

      # Acquire hourly data
      wetterdienst dwd observation values --station=1048,4411 --parameter=air_temperature --resolution=hourly --period=recent --date=2020-06-15T12

    Examples requesting forecast stations:

      wetterdienst dwd forecast stations

    Examples requesting forecast values:

      wetterdienst dwd forecast values --parameter=ttt,ff --station=65510

    Examples using geospatial features:

      # Acquire stations and readings by geoposition, request specific number of nearby stations.
      wetterdienst dwd observation stations --resolution=daily --parameter=kl --period=recent --lat=49.9195 --lon=8.9671 --rank=5
      wetterdienst dwd observation values --resolution=daily --parameter=kl --period=recent --lat=49.9195 --lon=8.9671 --rank=5 --date=2020-06-30

      # Acquire stations and readings by geoposition, request stations within specific distance.
      wetterdienst dwd observation stations --resolution=daily --parameter=kl --period=recent --lat=49.9195 --lon=8.9671 --distance=25
      wetterdienst dwd observation values --resolution=daily --parameter=kl --period=recent --lat=49.9195 --lon=8.9671 --distance=25 --date=2020-06-30

    Examples using SQL filtering:

      # Find stations by state.
      wetterdienst dwd observation stations --parameter=kl --resolution=daily --period=recent --sql="SELECT * FROM data WHERE state='Sachsen'"

      # Find stations by name (LIKE query).
      wetterdienst dwd observation stations --parameter=kl --resolution=daily --period=recent --sql="SELECT * FROM data WHERE lower(station_name) LIKE lower('%dresden%')"

      # Find stations by name (regexp query).
      wetterdienst dwd observation stations --parameter=kl --resolution=daily --period=recent --sql="SELECT * FROM data WHERE regexp_matches(lower(station_name), lower('.*dresden.*'))"

      # Filter measurements: Display daily climate observation readings where the maximum temperature is below two degrees celsius.
      wetterdienst dwd observation values --station=1048,4411 --parameter=kl --resolution=daily --period=recent --sql="SELECT * FROM data WHERE temperature_air_max_200 < 2.0;"

      # Filter measurements: Same as above, but use tidy format.
      # FIXME: Currently, this does not work, see https://github.com/earthobservations/wetterdienst/issues/377.
      wetterdienst dwd observation values --station=1048,4411 --parameter=kl --resolution=daily --period=recent --sql="SELECT * FROM data WHERE parameter='temperature_air_max_200' AND value < 2.0;" --tidy

    Examples for inquiring metadata:

      # Display list of available parameters (air_temperature, precipitation, pressure, ...)
      wetterdienst dwd about parameters

      # Display list of available resolutions (10_minutes, hourly, daily, ...)
      wetterdienst dwd about resolutions

      # Display list of available periods (historical, recent, now)
      wetterdienst dwd about periods

      # Display coverage/correlation between parameters, resolutions and periods.
      # This can answer questions like ...
      wetterdienst dwd about coverage

      # Tell me all periods and resolutions available for 'air_temperature'.
      wetterdienst dwd about coverage --parameter=air_temperature

      # Tell me all parameters available for 'daily' resolution.
      wetterdienst dwd about coverage --resolution=daily

    Examples for exporting data to files:

      # Export list of stations into spreadsheet
      wetterdienst dwd observations stations --parameter=kl --resolution=daily --period=recent --target=file://stations.xlsx

      # Shortcut command for fetching readings
      alias fetch="wetterdienst dwd observations values --station=1048,4411 --parameter=kl --resolution=daily --period=recent"

      # Export readings into spreadsheet (Excel-compatible)
      fetch --target="file://observations.xlsx"

      # Export readings into Parquet format and display head of Parquet file
      fetch --target="file://observations.parquet"

      # Check Parquet file
      parquet-tools schema observations.parquet
      parquet-tools head observations.parquet

      # Export readings into Zarr format
      fetch --target="file://observations.zarr"

    Examples for exporting data to databases:

      # Shortcut command for fetching readings
      alias fetch="wetterdienst dwd observation values --station=1048,4411 --parameter=kl --resolution=daily --period=recent"

      # Store readings to DuckDB
      fetch --target="duckdb:///dwd.duckdb?table=weather"

      # Store readings to InfluxDB
      fetch --target="influxdb://localhost/?database=dwd&table=weather"

      # Store readings to CrateDB
      fetch --target="crate://localhost/?database=dwd&table=weather"

    Invoke the HTTP REST API service:

      # Start service on standard port, listening on http://localhost:7890.
      wetterdienst restapi

      # Start service on standard port and watch filesystem changes.
      # This is suitable for development.
      wetterdienst restapi --reload

      # Start service on public interface and specific port.
      wetterdienst restapi --listen=0.0.0.0:8890

    Invoke the Wetterdienst Explorer UI service:

      # Start service on standard port, listening on http://localhost:7891.
      wetterdienst explorer

      # Start service on standard port and watch filesystem changes.
      # This is suitable for development.
      wetterdienst explorer --reload

      # Start service on public interface and specific port.
      wetterdienst explorer --listen=0.0.0.0:8891

    """
    appname = f"{__appname__} {__version__}"

    # Read command line options.
    options = normalize_options(docopt(run.__doc__, version=appname))

    # Setup logging.
    debug = options.get("debug")

    log_level = logging.INFO

    if debug:  # pragma: no cover
        log_level = logging.DEBUG

    setup_logging(log_level)

    # Run HTTP service.
    if options.restapi:  # pragma: no cover
        listen_address = options.listen
        log.info(f"Starting {appname}")
        log.info(f"Starting HTTP web service on http://{listen_address}")
        from wetterdienst.ui.restapi import start_service

        start_service(listen_address, reload=options.reload)
        return

    # Run UI service.
    if options.explorer:  # pragma: no cover
        listen_address = options.listen
        log.info(f"Starting {appname}")
        log.info(f"Starting UI web service on http://{listen_address}")
        from wetterdienst.ui.explorer.app import start_service

        start_service(listen_address, reload=options.reload)
        return

    # Handle radar data inquiry. Currently, "stations only".
    if options.radar:
        if options.dwd:
            data = DwdRadarSites().all()
        else:
            if options.odim_code:
                data = OperaRadarSites().by_odimcode(options.odim_code)
            elif options.wmo_code:
                data = OperaRadarSites().by_wmocode(options.wmo_code)
            elif options.country_name:
                data = OperaRadarSites().by_countryname(options.country_name)
            else:
                data = OperaRadarSites().all()

        output = json.dumps(data, indent=4)
        print(output)
        return

    # Output domain information.
    if options.about:
        about(options)
        return

    # Sanity checks.
    if (options["values"] or options.forecast) and options.format == "geojson":
        raise KeyError("GeoJSON format only available for stations output")

    # Acquire station list, also used for readings if required.
    # Filtering applied for distance (a.k.a. nearby) and pre-selected stations
    stations = None
    if options.observation:
        stations = DwdObservationRequest(
            parameter=read_list(options.parameter),
            resolution=options.resolution,
            period=options.period,
            tidy=options.tidy,
            si_units=False,
        )
    elif options.forecast:
        stations = DwdMosmixRequest(
            parameter=read_list(options.parameter),
            mosmix_type=DwdMosmixType.LARGE,
            tidy=options.tidy,
            si_units=False,
        )

    if options.latitude and options.longitude:
        if options.rank:
            stations = stations.filter_by_rank(
                latitude=float(options.latitude),
                longitude=float(options.longitude),
                rank=int(options.rank),
            )
        elif options.distance:
            stations = stations.filter_by_distance(
                latitude=float(options.latitude),
                longitude=float(options.longitude),
                distance=int(options.distance),
            )
        else:
            raise DocoptExit(
                "Geospatial queries need either --rank or --distance")
        results = stations

    elif options.station:
        results = stations.filter_by_station_id(read_list(options.station))

    else:
        results = stations.all()

    df = pd.DataFrame()

    if options.stations:
        pass

    elif options["values"]:
        try:
            # TODO: Add stream-based processing here.
            results = results.values.all()
        except ValueError as ex:
            log.exception(ex)
            sys.exit(1)

    df = results.df

    if df.empty:
        log.error("No data available for given constraints")
        sys.exit(1)

    # Filter readings by datetime expression.
    if options["values"] and options.date:
        results.filter_by_date(options.date)

    # Apply filtering by SQL.
    if options.sql:
        if options.tidy:
            log.error("Combining SQL filtering with tidy format not possible")
            sys.exit(1)

        log.info(f"Filtering with SQL: {options.sql}")
        results.filter_by_sql(options.sql)

    # Emit to data sink, e.g. write to database.
    if options.target:
        results.to_target(options.target)
        return

    # Render to output format.
    try:
        if options.format == "json":
            output = results.to_json()
        elif options.format == "csv":
            output = results.to_csv()
        elif options.format == "geojson":
            output = results.to_geojson()
        else:
            raise KeyError("Unknown output format")

    except KeyError as ex:
        log.error(
            f'{ex}. Output format must be one of "json", "geojson", "csv".')
        sys.exit(1)

    print(output)
Ejemplo n.º 26
0
def dwd_values(
    kind: str,
    stations: str = Query(default=None),
    parameter: str = Query(default=None),
    resolution: str = Query(default=None),
    period: str = Query(default=None),
    mosmix_type: str = Query(default=None),
    date: str = Query(default=None),
    sql: str = Query(default=None),
    tidy: bool = Query(default=True),
):
    """
    Acquire data from DWD.

    # TODO: Obtain lat/lon distance/number information.

    :param provider:
    :param kind:        string for product, either observation or forecast
    :param stations:     Comma-separated list of station identifiers.
    :param parameter:   Observation measure
    :param resolution:  Frequency/granularity of measurement interval
    :param period:      Recent or historical files
    :param mosmix_type: MOSMIX type. Either "small" or "large".
    :param date:        Date or date range
    :param sql:         SQL expression
    :param tidy:        Whether to return data in tidy format. Default: True.
    :return:
    """
    if kind not in ["observation", "mosmix"]:
        return HTTPException(
            status_code=404,
            detail=f"Unknown value for query argument 'kind={kind}' {kind}",
        )

    if stations is None:
        raise HTTPException(
            status_code=400, detail="Query argument 'stations' is required"
        )

    station_ids = map(str, read_list(stations))

    if kind == "observation":
        if parameter is None or resolution is None or period is None:
            raise HTTPException(
                status_code=400,
                detail="Query arguments 'parameter', 'resolution' "
                "and 'period' are required",
            )

        # Data acquisition.
        request = DwdObservationRequest(
            parameter=parameter,
            resolution=resolution,
            period=period,
            tidy=tidy,
            si_units=False,
        )
    else:
        if parameter is None or mosmix_type is None:
            raise HTTPException(
                status_code=400, detail="Query argument 'mosmix_type' is required"
            )

        request = DwdMosmixRequest(
            parameter=parameter, mosmix_type=mosmix_type, si_units=False
        )

    # Postprocessing.
    results = request.filter_by_station_id(station_id=station_ids).values.all()

    if date is not None:
        results.filter_by_date(date)

    if sql is not None:
        results.filter_by_sql(sql)

    data = json.loads(results.to_json())

    return make_json_response(data)
Ejemplo n.º 27
0
def test_dwd_observation_data_result_tidy_si():
    """Test for actual values (tidy) in metric units"""
    Settings.tidy = True
    Settings.humanize = False
    Settings.si_units = True

    request = DwdObservationRequest(
        parameter=[DwdObservationDataset.CLIMATE_SUMMARY],
        resolution=DwdObservationResolution.DAILY,
        start_date="1933-12-31",  # few days before official start
        end_date="1934-01-01",  # few days after official start,
    ).filter_by_station_id(station_id=(1048, ), )

    df = request.values.all().df

    assert list(df.columns.values) == [
        "station_id",
        "dataset",
        "parameter",
        "date",
        "value",
        "quality",
    ]

    assert_frame_equal(
        df,
        pd.DataFrame(
            {
                "station_id":
                pd.Categorical(["01048"] * 28),
                "dataset":
                pd.Categorical(["climate_summary"] * 28),
                "parameter":
                pd.Categorical([
                    "fx",
                    "fx",
                    "fm",
                    "fm",
                    "rsk",
                    "rsk",
                    "rskf",
                    "rskf",
                    "sdk",
                    "sdk",
                    "shk_tag",
                    "shk_tag",
                    "nm",
                    "nm",
                    "vpm",
                    "vpm",
                    "pm",
                    "pm",
                    "tmk",
                    "tmk",
                    "upm",
                    "upm",
                    "txk",
                    "txk",
                    "tnk",
                    "tnk",
                    "tgk",
                    "tgk",
                ]),
                "date": [
                    datetime(1933, 12, 31, tzinfo=pytz.UTC),
                    datetime(1934, 1, 1, tzinfo=pytz.UTC),
                    datetime(1933, 12, 31, tzinfo=pytz.UTC),
                    datetime(1934, 1, 1, tzinfo=pytz.UTC),
                    datetime(1933, 12, 31, tzinfo=pytz.UTC),
                    datetime(1934, 1, 1, tzinfo=pytz.UTC),
                    datetime(1933, 12, 31, tzinfo=pytz.UTC),
                    datetime(1934, 1, 1, tzinfo=pytz.UTC),
                    datetime(1933, 12, 31, tzinfo=pytz.UTC),
                    datetime(1934, 1, 1, tzinfo=pytz.UTC),
                    datetime(1933, 12, 31, tzinfo=pytz.UTC),
                    datetime(1934, 1, 1, tzinfo=pytz.UTC),
                    datetime(1933, 12, 31, tzinfo=pytz.UTC),
                    datetime(1934, 1, 1, tzinfo=pytz.UTC),
                    datetime(1933, 12, 31, tzinfo=pytz.UTC),
                    datetime(1934, 1, 1, tzinfo=pytz.UTC),
                    datetime(1933, 12, 31, tzinfo=pytz.UTC),
                    datetime(1934, 1, 1, tzinfo=pytz.UTC),
                    datetime(1933, 12, 31, tzinfo=pytz.UTC),
                    datetime(1934, 1, 1, tzinfo=pytz.UTC),
                    datetime(1933, 12, 31, tzinfo=pytz.UTC),
                    datetime(1934, 1, 1, tzinfo=pytz.UTC),
                    datetime(1933, 12, 31, tzinfo=pytz.UTC),
                    datetime(1934, 1, 1, tzinfo=pytz.UTC),
                    datetime(1933, 12, 31, tzinfo=pytz.UTC),
                    datetime(1934, 1, 1, tzinfo=pytz.UTC),
                    datetime(1933, 12, 31, tzinfo=pytz.UTC),
                    datetime(1934, 1, 1, tzinfo=pytz.UTC),
                ],
                "value":
                pd.to_numeric(
                    [
                        # FX
                        pd.NA,
                        pd.NA,
                        # FM
                        pd.NA,
                        pd.NA,
                        # RSK
                        pd.NA,
                        0.2,
                        # RSKF
                        pd.NA,
                        8,
                        # SDK
                        pd.NA,
                        pd.NA,
                        # SHK_TAG
                        pd.NA,
                        0,
                        # NM
                        pd.NA,
                        100.0,
                        # VPM
                        pd.NA,
                        640.0,
                        # PM
                        pd.NA,
                        100860.0,
                        # TMK
                        pd.NA,
                        273.65,
                        # UPM
                        pd.NA,
                        97.00,
                        # TXK
                        pd.NA,
                        273.84999999999997,
                        # TNK
                        pd.NA,
                        273.34999999999997,
                        # TGK
                        pd.NA,
                        pd.NA,
                    ],
                    errors="coerce",
                ).astype(float),
                "quality":
                pd.Series(
                    [
                        # FX
                        np.NaN,
                        np.NaN,
                        # FM
                        np.NaN,
                        np.NaN,
                        # RSK
                        np.NaN,
                        1,
                        # RSKF
                        np.NaN,
                        1,
                        # SDK
                        np.NaN,
                        np.NaN,
                        # SHK_TAG
                        np.NaN,
                        1,
                        # NM
                        np.NaN,
                        1,
                        # VPM
                        np.NaN,
                        1,
                        # PM
                        np.NaN,
                        1,
                        # TMK
                        np.NaN,
                        1,
                        # UPM
                        np.NaN,
                        1,
                        # TXK
                        np.NaN,
                        1,
                        # TNK
                        np.NaN,
                        1,
                        # TGK
                        np.NaN,
                        np.NaN,
                    ],
                    dtype=float,
                ),
            }, ),
        # Needed since pandas 1.2?
        check_categorical=False,
    )
Ejemplo n.º 28
0
def test_tidy_up_data():
    """Test for function to tidy data"""
    Settings.tidy = True
    Settings.humanize = False
    Settings.si_units = True

    station_id = "01048"
    request = DwdObservationRequest(
        "kl",
        "daily",
        "historical",
        start_date="2019-01-23 00:00:00",
    ).filter_by_station_id((station_id, ))

    df = pd.DataFrame({
        "station_id": ["01048"],
        "date": [pd.Timestamp("2019-01-23 00:00:00")],
        "qn_3": [10],
        "fx": [11.8],
        "fm": [5.8],
        "qn_4": [3],
        "rsk": [0.0],
        "rskf": [0.0],
        "sdk": [7.1],
        "shk_tag": [0.0],
        "nm": [2.3],
        "vpm": [3.2],
        "pm": [975.4],
        "tmk": [-5.5],
        "upm": [79.17],
        "txk": [-1.7],
        "tnk": [-7.9],
        "tgk": [-11.4],
    })

    df_tidied = request.values.tidy_up_df(df, request.parameter[0][1])

    df_tidied_organized = request.values._organize_df_columns(
        df_tidied, station_id, DwdObservationDataset.CLIMATE_SUMMARY)

    df_tidy = pd.DataFrame({
        "station_id": ["01048"] * 14,
        "dataset": ["climate_summary"] * 14,
        "parameter": [
            "fx",
            "fm",
            "rsk",
            "rskf",
            "sdk",
            "shk_tag",
            "nm",
            "vpm",
            "pm",
            "tmk",
            "upm",
            "txk",
            "tnk",
            "tgk",
        ],
        "date": [pd.Timestamp("2019-01-23 00:00:00")] * 14,
        "value": [
            11.8,
            5.8,
            0.0,
            0.0,
            7.1,
            0.0,
            2.3,
            3.2,
            975.4,
            -5.5,
            79.17,
            -1.7,
            -7.9,
            -11.4,
        ],
        "quality":
        pd.Series([10, 10, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3], dtype=float),
    })

    assert_frame_equal(df_tidied_organized, df_tidy)
Ejemplo n.º 29
0
def test_dwd_observation_data_result_tabular_metric():
    """Test for actual values (tabular) in metric units"""
    Settings.tidy = False
    Settings.humanize = False
    Settings.si_units = True

    request = DwdObservationRequest(
        parameter=[DwdObservationDataset.CLIMATE_SUMMARY],
        resolution=DwdObservationResolution.DAILY,
        start_date="1933-12-31",  # few days before official start
        end_date="1934-01-01",  # few days after official start,
    ).filter_by_station_id(station_id=[1048], )

    df = request.values.all().df

    assert list(df.columns.values) == [
        "station_id",
        "dataset",
        "date",
        "qn_3",
        "fx",
        "fm",
        "qn_4",
        "rsk",
        "rskf",
        "sdk",
        "shk_tag",
        "nm",
        "vpm",
        "pm",
        "tmk",
        "upm",
        "txk",
        "tnk",
        "tgk",
    ]

    assert_frame_equal(
        df,
        pd.DataFrame({
            "station_id":
            pd.Categorical(["01048"] * 2),
            "dataset":
            pd.Categorical(["climate_summary"] * 2),
            "date": [
                datetime(1933, 12, 31, tzinfo=pytz.UTC),
                datetime(1934, 1, 1, tzinfo=pytz.UTC),
            ],
            "qn_3":
            pd.to_numeric([pd.NA, pd.NA], errors="coerce"),
            "fx":
            pd.to_numeric([pd.NA, pd.NA], errors="coerce"),
            "fm":
            pd.to_numeric([pd.NA, pd.NA], errors="coerce"),
            "qn_4":
            pd.to_numeric([pd.NA, 1], errors="coerce"),
            "rsk":
            pd.to_numeric([pd.NA, 0.2], errors="coerce"),
            "rskf":
            pd.to_numeric([pd.NA, 8], errors="coerce"),
            "sdk":
            pd.to_numeric([pd.NA, pd.NA], errors="coerce"),
            "shk_tag":
            pd.to_numeric([pd.NA, 0], errors="coerce"),
            "nm":
            pd.to_numeric([pd.NA, 100.0], errors="coerce"),
            "vpm":
            pd.to_numeric([pd.NA, 640.0], errors="coerce"),
            "pm":
            pd.to_numeric([pd.NA, 100860.0], errors="coerce"),
            "tmk":
            pd.to_numeric([pd.NA, 273.65], errors="coerce"),
            "upm":
            pd.to_numeric([pd.NA, 97.00], errors="coerce"),
            "txk":
            pd.to_numeric([pd.NA, 273.84999999999997], errors="coerce"),
            "tnk":
            pd.to_numeric([pd.NA, 273.34999999999997], errors="coerce"),
            "tgk":
            pd.to_numeric([pd.NA, pd.NA], errors="coerce"),
        }),
        check_categorical=False,
    )
Ejemplo n.º 30
0
def test_dwd_observation_data_result_tabular():
    """ Test for actual values (tabular) """
    request = DwdObservationRequest(
        parameter=[DwdObservationDataset.CLIMATE_SUMMARY],
        resolution=DwdObservationResolution.DAILY,
        start_date="1933-12-31",  # few days before official start
        end_date="1934-01-01",  # few days after official start,
        tidy=False,
        humanize=False,
        si_units=False,
    ).filter_by_station_id(station_id=[1048], )

    df = request.values.all().df

    assert list(df.columns.values) == [
        "date",
        "station_id",
        "qn_3",
        "fx",
        "fm",
        "qn_4",
        "rsk",
        "rskf",
        "sdk",
        "shk_tag",
        "nm",
        "vpm",
        "pm",
        "tmk",
        "upm",
        "txk",
        "tnk",
        "tgk",
    ]

    assert_frame_equal(
        df,
        pd.DataFrame({
            "date": [
                datetime(1933, 12, 31, tzinfo=pytz.UTC),
                datetime(1934, 1, 1, tzinfo=pytz.UTC),
            ],
            "station_id":
            pd.Categorical(["01048", "01048"]),
            "qn_3":
            pd.Series([pd.NA, pd.NA], dtype=pd.Int64Dtype()),
            "fx":
            pd.to_numeric([pd.NA, pd.NA], errors="coerce"),
            "fm":
            pd.to_numeric([pd.NA, pd.NA], errors="coerce"),
            "qn_4":
            pd.Series([pd.NA, 1], dtype=pd.Int64Dtype()),
            "rsk":
            pd.to_numeric([pd.NA, 0.2], errors="coerce"),
            "rskf":
            pd.to_numeric([pd.NA, 8], errors="coerce"),
            "sdk":
            pd.to_numeric([pd.NA, pd.NA], errors="coerce"),
            "shk_tag":
            pd.Series([pd.NA, 0], dtype=pd.Int64Dtype()),
            "nm":
            pd.to_numeric([pd.NA, 8.0], errors="coerce"),
            "vpm":
            pd.to_numeric([pd.NA, 6.4], errors="coerce"),
            "pm":
            pd.to_numeric([pd.NA, 1008.60], errors="coerce"),
            "tmk":
            pd.to_numeric([pd.NA, 0.5], errors="coerce"),
            "upm":
            pd.to_numeric([pd.NA, 97.00], errors="coerce"),
            "txk":
            pd.to_numeric([pd.NA, 0.7], errors="coerce"),
            "tnk":
            pd.to_numeric([pd.NA, 0.2], errors="coerce"),
            "tgk":
            pd.to_numeric([pd.NA, pd.NA], errors="coerce"),
        }),
    )