Esempio n. 1
0
def test_filter_by_date_monthly():

    result = pd.DataFrame.from_dict([{
        "STATION_ID":
        1048,
        "PARAMETER":
        "climate_summary",
        "ELEMENT":
        "temperature_air_max_200",
        "FROM_DATE":
        parse_datetime("2019-12-28T00:00:00.000"),
        "TO_DATE":
        parse_datetime("2020-01-28T00:00:00.000"),
        "VALUE":
        1.3,
        "QUALITY":
        None,
    }])

    df = result.dwd.filter_by_date("2019-12/2020-01",
                                   DWDObservationResolution.MONTHLY)
    assert not df.empty

    df = result.dwd.filter_by_date("2020/2022",
                                   DWDObservationResolution.MONTHLY)
    assert df.empty

    df = result.dwd.filter_by_date("2020", DWDObservationResolution.MONTHLY)
    assert df.empty
Esempio n. 2
0
def test_filter_by_date_annual():

    result = pd.DataFrame.from_dict([{
        "STATION_ID":
        1048,
        "PARAMETER":
        "climate_summary",
        "ELEMENT":
        "temperature_air_max_200",
        "FROM_DATE":
        parse_datetime("2019-01-01T00:00:00.000"),
        "TO_DATE":
        parse_datetime("2019-12-31T00:00:00.000"),
        "VALUE":
        1.3,
        "QUALITY":
        None,
    }])

    df = result.dwd.filter_by_date("2019-05/2019-09", TimeResolution.ANNUAL)
    assert not df.empty

    df = result.dwd.filter_by_date("2020/2022", TimeResolution.ANNUAL)
    assert df.empty

    df = result.dwd.filter_by_date("2020", TimeResolution.ANNUAL)
    assert df.empty
Esempio n. 3
0
    def __init__(
        self,
        start_date: Union[None, str, Timestamp] = None,
        end_date: Union[None, str, Timestamp] = None,
    ) -> None:
        """

        :param start_date: start date for filtering stations for their available data
        :param end_date: end date for filtering stations for their available data
        """
        # TODO: make datetimes timezone sensible
        start_date = (start_date if not start_date or isinstance(
            start_date, datetime) else parse_datetime(start_date))
        end_date = (end_date if not end_date or isinstance(end_date, datetime)
                    else parse_datetime(end_date))

        start_date = start_date.replace(tzinfo=self.tz) if start_date else None
        end_date = end_date.replace(tzinfo=self.tz) if end_date else None

        if start_date and end_date:
            if start_date > end_date:
                raise StartDateEndDateError(
                    "'start_date' has to be before 'end_date'")

        self.start_date = start_date
        self.end_date = end_date
Esempio n. 4
0
def test_parse_datetime():
    assert parse_datetime("2020-05-01") == datetime(
        2020, 5, 1, 0, 0).replace(tzinfo=timezone("UTC"))
    assert parse_datetime("2020-05-01T13:14:15") == datetime(
        2020, 5, 1, 13, 14, 15).replace(tzinfo=timezone("UTC"))
    assert parse_datetime("2020-05-01T13") == datetime(
        2020, 5, 1, 13, 0).replace(tzinfo=timezone("UTC"))
Esempio n. 5
0
    def filter_by_date(self, date: str,
                       time_resolution: TimeResolution) -> pd.DataFrame:
        """
        Filter Pandas DataFrame by date or date interval.

        Accepts different kinds of date formats, like:

        - 2020-05-01
        - 2020-06-15T12
        - 2020-05
        - 2019
        - 2020-05-01/2020-05-05
        - 2017-01/2019-12
        - 2010/2020

        :param date:
        :param time_resolution:
        :return: Filtered DataFrame
        """

        # Filter by date interval.
        if "/" in date:
            date_from, date_to = date.split("/")
            date_from = parse_datetime(date_from)
            date_to = parse_datetime(date_to)
            if time_resolution in (
                    TimeResolution.ANNUAL,
                    TimeResolution.MONTHLY,
            ):
                date_from, date_to = mktimerange(time_resolution, date_from,
                                                 date_to)
                expression = (
                    date_from <= self.df[DWDMetaColumns.FROM_DATE.value]) & (
                        self.df[DWDMetaColumns.TO_DATE.value] <= date_to)
            else:
                expression = (
                    date_from <= self.df[DWDMetaColumns.DATE.value]) & (
                        self.df[DWDMetaColumns.DATE.value] <= date_to)
            df = self.df[expression]

        # Filter by specific date.
        else:
            date = parse_datetime(date)
            if time_resolution in (
                    TimeResolution.ANNUAL,
                    TimeResolution.MONTHLY,
            ):
                date_from, date_to = mktimerange(time_resolution, date)
                expression = (
                    date_from <= self.df[DWDMetaColumns.FROM_DATE.value]) & (
                        self.df[DWDMetaColumns.TO_DATE.value] <= date_to)
            else:
                expression = date == self.df[DWDMetaColumns.DATE.value]
            df = self.df[expression]

        return df
Esempio n. 6
0
    def __init__(
        self,
        start_date: Union[None, str, Timestamp] = None,
        end_date: Union[None, str, Timestamp] = None,
    ) -> None:
        start_date = (
            start_date
            if not start_date or isinstance(start_date, datetime)
            else parse_datetime(start_date)
        )
        end_date = (
            end_date
            if not end_date or isinstance(end_date, datetime)
            else parse_datetime(end_date)
        )

        if start_date and end_date:
            if start_date > end_date:
                raise StartDateEndDateError("'start_date' has to be before 'end_date'")

        self.start_date = start_date
        self.end_date = end_date
Esempio n. 7
0
def test_filter_by_date_annual():

    result = pd.DataFrame.from_dict({
        "STATION_ID": ["01048"],
        "PARAMETER_SET": ["climate_summary"],
        "PARAMETER": ["temperature_air_max_200"],
        "FROM_DATE": [parse_datetime("2019-01-01T00:00:00.000")],
        "TO_DATE": [parse_datetime("2019-12-31T00:00:00.000")],
        "VALUE": [1.3],
        "QUALITY": [None],
    })

    df = result.dwd.filter_by_date("2019-05/2019-09",
                                   DWDObservationResolution.ANNUAL)
    assert not df.empty

    df = result.dwd.filter_by_date("2020/2022",
                                   DWDObservationResolution.ANNUAL)
    assert df.empty

    df = result.dwd.filter_by_date("2020", DWDObservationResolution.ANNUAL)
    assert df.empty
Esempio n. 8
0
import pytest
from surrogate import surrogate

from wetterdienst.dwd.observations import (
    DWDObservationData,
    DWDObservationParameterSet,
    DWDObservationResolution,
    DWDObservationPeriod,
)
from wetterdienst.dwd.util import parse_datetime

df_station = pd.DataFrame.from_dict([{
    "STATION_ID":
    19087,
    "FROM_DATE":
    parse_datetime("1957-05-01T00:00:00.000Z"),
    "TO_DATE":
    parse_datetime("1995-11-30T00:00:00.000Z"),
    "STATION_HEIGHT":
    645.0,
    "LAT":
    48.8049,
    "LON":
    13.5528,
    "STATION_NAME":
    "Freyung vorm Wald",
    "STATE":
    "Bayern",
    "HAS_FILE":
    False,
}])
Esempio n. 9
0
def get_nearby_stations_by_number(
    latitude: float,
    longitude: float,
    num_stations_nearby: int,
    parameter: Union[Parameter, str],
    time_resolution: Union[TimeResolution, str],
    period_type: Union[PeriodType, str],
    minimal_available_date: Optional[Union[datetime, str]] = None,
    maximal_available_date: Optional[Union[datetime, str]] = None,
) -> pd.DataFrame:
    """
    Provides a list of weather station ids for the requested data

    :param latitude:                Latitude of location to search for nearest
                                    weather station
    :param longitude:               Longitude of location to search for nearest
                                    weather station
    :param minimal_available_date:  Start date of timespan where measurements
                                    should be available
    :param maximal_available_date:  End date of timespan where measurements
                                    should be available
    :param parameter:               Observation measure
    :param time_resolution:         Frequency/granularity of measurement interval
    :param period_type:             Recent or historical files
    :param num_stations_nearby:     Number of stations that should be nearby

    :return:                        DataFrames with valid stations in radius per
                                    requested location

    """
    if num_stations_nearby <= 0:
        raise ValueError("'num_stations_nearby' has to be at least 1.")

    parameter = parse_enumeration_from_template(parameter, Parameter)
    time_resolution = parse_enumeration_from_template(time_resolution,
                                                      TimeResolution)
    period_type = parse_enumeration_from_template(period_type, PeriodType)

    if not check_parameters(parameter, time_resolution, period_type):
        raise InvalidParameterCombination(
            f"The combination of {parameter.value}, {time_resolution.value}, "
            f"{period_type.value} is invalid.")

    minimal_available_date = (minimal_available_date
                              if not minimal_available_date
                              or isinstance(minimal_available_date, datetime)
                              else parse_datetime(minimal_available_date))
    maximal_available_date = (maximal_available_date
                              if not minimal_available_date
                              or isinstance(maximal_available_date, datetime)
                              else parse_datetime(maximal_available_date))

    if minimal_available_date and maximal_available_date:
        if minimal_available_date > maximal_available_date:
            raise ValueError("'minimal_available_date' has to be before "
                             "'maximal_available_date'")

    coords = Coordinates(np.array(latitude), np.array(longitude))

    metadata = metadata_for_climate_observations(parameter, time_resolution,
                                                 period_type)

    # Filter only for stations that have a file
    metadata = metadata[metadata[DWDMetaColumns.HAS_FILE.value].values]

    if minimal_available_date:
        metadata = metadata[
            metadata[DWDMetaColumns.FROM_DATE.value] <= minimal_available_date]

    if maximal_available_date:
        metadata = metadata[
            metadata[DWDMetaColumns.TO_DATE.value] >= maximal_available_date]

    metadata = metadata.reset_index(drop=True)

    distances, indices_nearest_neighbours = _derive_nearest_neighbours(
        metadata.LAT.values, metadata.LON.values, coords, num_stations_nearby)

    distances = pd.Series(distances)
    indices_nearest_neighbours = pd.Series(indices_nearest_neighbours)

    # If num_stations_nearby is higher then the actual amount of stations
    # further indices and distances are added which have to be filtered out
    distances = distances[:min(metadata.shape[0], num_stations_nearby)]
    indices_nearest_neighbours = indices_nearest_neighbours[:min(
        metadata.shape[0], num_stations_nearby)]

    distances_km = np.array(distances * KM_EARTH_RADIUS)

    metadata_location = metadata.iloc[
        indices_nearest_neighbours, :].reset_index(drop=True)

    metadata_location[DWDMetaColumns.DISTANCE_TO_LOCATION.value] = distances_km

    if metadata_location.empty:
        logger.warning(f"No weather stations were found for coordinate "
                       f"{latitude}°N and {longitude}°E ")

    return metadata_location
Esempio n. 10
0
def test_parse_datetime():
    assert parse_datetime("2020-05-01") == datetime(2020, 5, 1, 0, 0)
    assert parse_datetime("2020-05-01T13:14:15") == datetime(
        2020, 5, 1, 13, 14, 15)
    assert parse_datetime("2020-05-01T13") == datetime(2020, 5, 1, 13, 0)
Esempio n. 11
0
import mock
import pandas as pd
import pytest
from surrogate import surrogate

from wetterdienst.dwd.observations import (
    DWDObservationData,
    DWDObservationParameterSet,
    DWDObservationResolution,
    DWDObservationPeriod,
)
from wetterdienst.dwd.util import parse_datetime

df_station = pd.DataFrame.from_dict({
    "STATION_ID": ["19087"],
    "FROM_DATE": [parse_datetime("1957-05-01T00:00:00.000Z")],
    "TO_DATE": [parse_datetime("1995-11-30T00:00:00.000Z")],
    "STATION_HEIGHT": [645.0],
    "LAT": [48.8049],
    "LON": [13.5528],
    "STATION_NAME": ["Freyung vorm Wald"],
    "STATE": ["Bayern"],
    "HAS_FILE": [False],
})

df_data = pd.DataFrame.from_dict({
    "STATION_ID": ["01048"],
    "PARAMETER_SET": ["CLIMATE_SUMMARY"],
    "PARAMETER": ["TEMPERATURE_AIR_MAX_200"],
    "DATE": [parse_datetime("2019-12-28T00:00:00.000Z")],
    "VALUE": [1.3],
Esempio n. 12
0
    def filter_by_date(
        self, date: str, resolution: DWDObservationResolution
    ) -> pd.DataFrame:
        """
        Filter Pandas DataFrame by date or date interval.

        Accepts different kinds of date formats, like:

        - 2020-05-01
        - 2020-06-15T12
        - 2020-05
        - 2019
        - 2020-05-01/2020-05-05
        - 2017-01/2019-12
        - 2010/2020

        :param date:
        :param resolution:
        :return: Filtered DataFrame
        """

        # TODO: datetimes should be aware of tz
        # TODO: resolution is not necessarily available and ideally filtering does not
        #  depend on it
        # Filter by date interval.
        if "/" in date:
            date_from, date_to = date.split("/")
            date_from = parse_datetime(date_from)
            date_to = parse_datetime(date_to)
            if resolution in (
                DWDObservationResolution.ANNUAL,
                DWDObservationResolution.MONTHLY,
            ):
                date_from, date_to = mktimerange(resolution, date_from, date_to)
                expression = (date_from <= self.df[DWDMetaColumns.FROM_DATE.value]) & (
                    self.df[DWDMetaColumns.TO_DATE.value] <= date_to
                )
            else:
                expression = (date_from <= self.df[DWDMetaColumns.DATE.value]) & (
                    self.df[DWDMetaColumns.DATE.value] <= date_to
                )
            df = self.df[expression]

        # Filter by specific date.
        else:
            # TODO: make datetime tz aware
            date = parse_datetime(date)

            if resolution in (
                DWDObservationResolution.ANNUAL,
                DWDObservationResolution.MONTHLY,
            ):
                date_from, date_to = mktimerange(resolution, date)
                expression = (date_from <= self.df[DWDMetaColumns.FROM_DATE.value]) & (
                    self.df[DWDMetaColumns.TO_DATE.value] <= date_to
                )
            else:
                expression = date == self.df[DWDMetaColumns.DATE.value]
            df = self.df[expression]

        return df