Ejemplo n.º 1
0
    def __init__(
        self,
        parameter_set: Union[str, DWDObservationParameterSet],
        resolution: Union[str, DWDObservationResolution],
        period: Union[str, DWDObservationPeriod] = None,
        start_date: Union[None, str, Timestamp] = None,
        end_date: Union[None, str, Timestamp] = None,
    ):
        """

        :param parameter_set: parameter set str/enumeration
        :param resolution: resolution str/enumeration
        :param period: period str/enumeration
        :param start_date: start date to limit the stations
        :param end_date: end date to limit the stations
        """
        super().__init__(start_date=start_date, end_date=end_date)

        parameter_set = parse_enumeration_from_template(
            parameter_set, DWDObservationParameterSet)
        resolution = parse_enumeration_from_template(resolution,
                                                     DWDObservationResolution)
        period = parse_enumeration_from_template(period, DWDObservationPeriod)

        # TODO: move to _all and replace error with logging + empty dataframe
        if not check_dwd_observations_parameter_set(parameter_set, resolution,
                                                    period):
            raise InvalidParameterCombination(
                f"The combination of {parameter_set.value}, {resolution.value}, "
                f"{period.value} is invalid.")

        self.parameter = parameter_set
        self.resolution = resolution
        self.period = period
Ejemplo n.º 2
0
def create_parameter_to_dataset_combination(
    parameter: Union[DwdObservationParameter, DwdObservationDataset],
    resolution: Resolution,
) -> Tuple[Union[DwdObservationParameter, DwdObservationDataset],
           DwdObservationDataset, ]:
    """Function to create a mapping from a requested parameter to a provided parameter
    set which has to be downloaded first to extract the parameter from it"""
    try:
        parameter_ = parse_enumeration_from_template(
            parameter, DwdObservationParameter[resolution.name])

        parameter = PARAMETER_TO_DATASET_MAPPING[resolution][parameter_]

        return parameter, parse_enumeration_from_template(
            parameter.__class__.__name__, DwdObservationDataset)
    except (KeyError, InvalidEnumeration):
        try:
            parameter_set = parse_enumeration_from_template(
                parameter, DwdObservationDataset)

            return parameter_set, parameter_set
        except InvalidEnumeration:
            raise InvalidParameter(
                f"parameter {parameter} could not be parsed for "
                f"time resolution {resolution}")
Ejemplo n.º 3
0
def dwd_stations(
    parameter: str = Query(default=None),
    resolution: str = Query(default=None),
    period: str = Query(default=None),
    sql: str = Query(default=None),
):

    parameter = parse_enumeration_from_template(parameter, DWDObservationParameterSet)
    resolution = parse_enumeration_from_template(resolution, DWDObservationResolution)
    period = parse_enumeration_from_template(period, DWDObservationPeriod)

    # Data acquisition.
    df = DWDObservationSites(
        parameter_set=parameter,
        resolution=resolution,
        period=period,
    ).all()

    # Postprocessing.
    df = df.dwd.lower()

    if sql is not None:
        df = df.io.sql(sql)

    return make_json_response(df.io.to_dict())
Ejemplo n.º 4
0
    def __init__(
        self,
        parameter_set: Union[str, DWDObservationParameterSet],
        resolution: Union[str, DWDObservationResolution],
        period: Union[str, DWDObservationPeriod] = None,
        start_date: Union[None, str, Timestamp] = None,
        end_date: Union[None, str, Timestamp] = None,
    ):
        super().__init__(start_date=start_date, end_date=end_date)

        parameter_set = parse_enumeration_from_template(
            parameter_set, DWDObservationParameterSet
        )
        resolution = parse_enumeration_from_template(
            resolution, DWDObservationResolution
        )
        period = parse_enumeration_from_template(period, DWDObservationPeriod)

        if not check_dwd_observations_parameter_set(parameter_set, resolution, period):
            raise InvalidParameterCombination(
                f"The combination of {parameter_set.value}, {resolution.value}, "
                f"{period.value} is invalid."
            )

        self.parameter = parameter_set
        self.resolution = resolution
        self.period = period
Ejemplo n.º 5
0
def dwd_sites(
    product: str,
    parameter: str = Query(default=None),
    resolution: str = Query(default=None),
    period: str = Query(default=None),
    lon: float = Query(default=None),
    lat: float = Query(default=None),
    number_nearby: int = Query(default=None),
    max_distance_in_km: int = Query(default=None),
    sql: str = Query(default=None),
):
    if product not in ["observations", "mosmix"]:
        return HTTPException(status_code=404, detail=f"product {product} not found")

    # Data acquisition.
    if product == "observations":
        if parameter is None or resolution is None or period is None:
            raise HTTPException(
                status_code=400,
                detail="Query arguments 'parameter', 'resolution' "
                "and 'period' are required",
            )

        parameter = parse_enumeration_from_template(
            parameter, DWDObservationParameterSet
        )
        resolution = parse_enumeration_from_template(
            resolution, DWDObservationResolution
        )
        period = parse_enumeration_from_template(period, DWDObservationPeriod)

        sites = DWDObservationStations(
            parameter_set=parameter,
            resolution=resolution,
            period=period,
        )
    else:
        sites = DWDMosmixStations()

    if lon and lat and (number_nearby or max_distance_in_km):
        if number_nearby:
            df = sites.nearby_number(
                latitude=lat, longitude=lon, num_stations_nearby=number_nearby
            )
        else:
            df = sites.nearby_radius(
                latitude=lat, longitude=lon, max_distance_in_km=max_distance_in_km
            )
    else:
        df = sites.all()

    # Postprocessing.
    df = df.dwd.lower()

    if sql is not None:
        df = df.io.sql(sql)

    return make_json_response(df.fillna(-999).io.to_dict())
Ejemplo n.º 6
0
def test_parse_enumeration_from_template():
    assert (parse_enumeration_from_template("climate_summary",
                                            DWDObservationParameterSet) ==
            DWDObservationParameterSet.CLIMATE_SUMMARY)
    assert (parse_enumeration_from_template("kl", DWDObservationParameterSet)
            == DWDObservationParameterSet.CLIMATE_SUMMARY)

    with pytest.raises(InvalidEnumeration):
        parse_enumeration_from_template("climate", DWDObservationParameterSet)
Ejemplo n.º 7
0
    def _build_complete_df(self, df: pd.DataFrame, station_id: str, parameter: Enum, dataset: Enum) -> pd.DataFrame:
        """Method to build a complete df with all dates from start to end date included.

        :param df:
        :param station_id:
        :param parameter:
        :param dataset:
        :return:
        """
        if parameter != dataset or not self.sr.tidy:
            base_df = self._get_base_df(station_id)

            df = pd.merge(
                left=base_df,
                right=df,
                left_on=Columns.DATE.value,
                right_on=Columns.DATE.value,
                how="left",
            )

            if self.sr.tidy:
                df.loc[:, Columns.PARAMETER.value] = parameter.value
                df.loc[:, Columns.PARAMETER.value] = pd.Categorical(df.loc[:, Columns.PARAMETER.value])

            return df
        else:
            data = []
            for parameter, group in df.groupby(Columns.PARAMETER.value, sort=False):
                if self.sr._unique_dataset:
                    parameter_ = parse_enumeration_from_template(
                        parameter,
                        self.sr._parameter_base[self.sr.resolution.name],
                    )
                else:
                    parameter_ = parse_enumeration_from_template(
                        parameter,
                        self.sr._parameter_base[self.sr._dataset_accessor][dataset.name],
                    )

                df = pd.merge(
                    left=self._get_base_df(station_id),
                    right=group,
                    left_on=Columns.DATE.value,
                    right_on=Columns.DATE.value,
                    how="left",
                )

                df[Columns.PARAMETER.value] = parameter_.value.lower()

                data.append(df)

            return pd.concat(data)
Ejemplo n.º 8
0
def test_parse_enumeration_from_template():
    assert (
        parse_enumeration_from_template("climate_summary", DwdObservationDataset)
        == DwdObservationDataset.CLIMATE_SUMMARY
    )
    assert (
        parse_enumeration_from_template("CLIMATE_SUMMARY", DwdObservationDataset)
        == DwdObservationDataset.CLIMATE_SUMMARY
    )
    assert parse_enumeration_from_template("kl", DwdObservationDataset) == DwdObservationDataset.CLIMATE_SUMMARY

    with pytest.raises(InvalidEnumeration):
        parse_enumeration_from_template("climate", DwdObservationDataset)
Ejemplo n.º 9
0
    def _parse_parameter_and_dataset(self, parameter) -> Tuple[Enum, Enum]:
        """Try to parse dataset first e.g. when "climate_summary" or
        "precipitation_height", "climate_summary" is requested

        :param parameter:
        :return:
        """

        parameter_, dataset_ = None, None

        flat_parameters = {
            par
            for par in self._parameter_base[self._dataset_accessor]
            if hasattr(par, "name")
        }

        for par in flat_parameters:
            if par.name.lower() == parameter.lower() or par.value.lower(
            ) == parameter.lower():
                parameter_ = par
                break

        if parameter_:
            dataset_name = parameter_.__class__.__name__

            dataset_ = parse_enumeration_from_template(dataset_name,
                                                       self._dataset_base)

        return parameter_, dataset_
Ejemplo n.º 10
0
    def __init__(
        self,
        parameter: Optional[Tuple[Union[str, DwdMosmixParameter], ...]],
        mosmix_type: Union[str, DwdMosmixType],
        start_issue: Optional[Union[str, datetime,
                                    DwdForecastDate]] = DwdForecastDate.LATEST,
        end_issue: Optional[Union[str, datetime]] = None,
        start_date: Optional[Union[str, datetime]] = None,
        end_date: Optional[Union[str, datetime]] = None,
        humanize: bool = True,
        tidy: bool = True,
        si_units: bool = True,
    ) -> None:
        self.mosmix_type = parse_enumeration_from_template(
            mosmix_type, DwdMosmixType)

        super().__init__(
            parameter=parameter,
            start_date=start_date,
            end_date=end_date,
            resolution=Resolution.HOURLY,
            period=Period.FUTURE,
            si_units=si_units,
        )

        # Parse issue date if not set to fixed "latest" string
        if start_issue is DwdForecastDate.LATEST and end_issue:
            log.info(
                "end_issue will be ignored as 'latest' was selected for issue date"
            )

        if start_issue is not DwdForecastDate.LATEST:
            if not start_issue and not end_issue:
                start_issue = DwdForecastDate.LATEST
            elif not end_issue:
                end_issue = start_issue
            elif not start_issue:
                start_issue = end_issue

            start_issue = pd.to_datetime(
                start_issue, infer_datetime_format=True).floor("1H")
            end_issue = pd.to_datetime(end_issue,
                                       infer_datetime_format=True).floor("1H")

            # Shift start date and end date to 3, 9, 15, 21 hour format
            if mosmix_type == DwdMosmixType.LARGE:
                start_issue = self.adjust_datetime(start_issue)
                end_issue = self.adjust_datetime(end_issue)

        # TODO: this should be replaced by the freq property in the main class
        if self.mosmix_type == DwdMosmixType.SMALL:
            self.resolution = Resolution.HOURLY
        else:
            self.resolution = Resolution.HOUR_6

        self.start_issue = start_issue
        self.end_issue = end_issue
        self.humanize = humanize
        self.tidy = tidy
Ejemplo n.º 11
0
    def describe_fields(cls,
                        dataset,
                        resolution,
                        period,
                        language: str = "en") -> dict:
        """

        :param dataset:
        :param resolution:
        :param period:
        :param language:
        :return:
        """
        from wetterdienst.provider.dwd.observation.fields import read_description

        dataset = parse_enumeration_from_template(dataset,
                                                  DwdObservationDataset)
        resolution = parse_enumeration_from_template(resolution,
                                                     cls._resolution_base,
                                                     Resolution)
        period = parse_enumeration_from_template(period, cls._period_base,
                                                 Period)

        file_index = _create_file_index_for_dwd_server(
            dataset=dataset,
            resolution=resolution,
            period=period,
            cdc_base=DWDCDCBase.CLIMATE_OBSERVATIONS,
        )

        if language == "en":
            file_prefix = "DESCRIPTION_"
        elif language == "de":
            file_prefix = "BESCHREIBUNG_"
        else:
            raise ValueError("Only language 'en' or 'de' supported")

        file_index = file_index[file_index[
            DwdColumns.FILENAME.value].str.contains(file_prefix)]

        description_file_url = str(
            file_index[DwdColumns.FILENAME.value].tolist()[0])
        log.info(f"Acquiring field information from {description_file_url}")

        return read_description(description_file_url, language=language)
Ejemplo n.º 12
0
    def __new__(cls, provider: Union[Provider, str], kind: Union[Kind, str]):
        """

        :param provider: provider of data e.g. DWD
        :param kind: kind of the data e.g. observation
        """
        # Both provider and kind should be fine (if not an exception is raised)
        provider = parse_enumeration_from_template(provider, Provider)
        kind = parse_enumeration_from_template(kind, Kind)

        api = cls.endpoints.get(provider, {}).get(kind)

        if not api:
            raise ValueError(
                f"No API available for provider {provider.value} and kind {kind.value}"
            )

        return api
Ejemplo n.º 13
0
 def resolution(self, res) -> None:
     # TODO: add functionality to parse arbitrary resolutions for cases where
     #  resolution has to be determined based on returned data
     if self._resolution_type in (ResolutionType.FIXED, ResolutionType.UNDEFINED):
         self._resolution = res
     else:
         self._resolution = parse_enumeration_from_template(
             res, self._resolution_base, Resolution
         )
Ejemplo n.º 14
0
    def _parse_dataset_and_parameter(
            self, parameter, dataset) -> Tuple[Optional[Enum], Optional[Enum]]:
        """
        Parse parameters for cases like
            - parameter=("climate_summary", ) or
            - parameter=(("precipitation_height", "climate_summary"))
        :param self:
        :param parameter:
        :param dataset:
        :return:
        """
        parameter_, dataset_ = None, None

        try:
            dataset_ = parse_enumeration_from_template(dataset,
                                                       self._dataset_base)
        except InvalidEnumeration:
            pass

        if dataset_ and self._has_datasets and not self._unique_dataset:
            try:
                self._parameter_base[self._dataset_accessor][dataset_.name]
            except (KeyError, AttributeError):
                log.warning(
                    f"dataset {dataset_.name} is not a valid dataset for resolution {self._dataset_accessor}"
                )
                return None, None

        if dataset_:
            if parameter == dataset:
                # Case 1: entire dataset e.g. parameter="climate_summary"
                parameter_, dataset_ = dataset_, dataset_
            else:
                # Case 2: dataset and parameter e.g. (precipitation_height, climate_summary)
                try:
                    parameter_ = parse_enumeration_from_template(
                        parameter, self._parameter_base[self._dataset_accessor]
                        [dataset_.name])
                except (InvalidEnumeration, TypeError):
                    pass

        return parameter_, dataset_
Ejemplo n.º 15
0
def create_parameter_to_parameter_set_combination(
    parameter: Union[DWDObservationParameter, DWDObservationParameterSet],
    resolution: DWDObservationResolution,
) -> Tuple[Union[DWDObservationParameter, DWDObservationParameterSet],
           DWDObservationParameterSet, ]:
    """Function to create a mapping from a requested parameter to a provided parameter
    set which has to be downloaded first to extract the parameter from it"""
    parameter_set_enums = [
        value for key, value in DWDObservationParameterSetStructure[
            resolution.name].__dict__.items() if not key.startswith("_")
    ]

    for parameter_set_enum in parameter_set_enums:
        try:
            parameter_ = parse_enumeration_from_template(
                parameter,
                DWDObservationParameterSetStructure[resolution.name][
                    parameter_set_enum.__name__],
            )

            parameter_set = parse_enumeration_from_template(
                parameter_set_enum.__name__, DWDObservationParameterSet)

            return parameter_, parameter_set
        except InvalidEnumeration:
            pass

    try:
        parameter_set = parse_enumeration_from_template(
            parameter, DWDObservationParameterSet)
        return parameter_set, parameter_set
    except InvalidEnumeration:
        pass

    raise InvalidParameter(f"parameter {parameter} could not be parsed for "
                           f"time resolution {resolution}")
Ejemplo n.º 16
0
    def __new__(cls, provider: str, network: str):
        """

        :param provider: provider of data e.g. DWD
        :param network: data network e.g. NOAAs ghcn
        """
        # Both provider and network should be fine (if not an exception is raised)
        try:
            provider_ = parse_enumeration_from_template(provider, Provider)

            api = cls.endpoints[provider_.name][network.upper()].value

            if not api:
                raise KeyError

        except (InvalidEnumeration, KeyError):
            raise ProviderError(
                f"No API available for provider {provider} and network {network}"
            )

        return api
def test_compare_available_dwd_datasets():
    """Test to compare the datasets made available with wetterdienst with the ones actually availabel on the DWD CDC
    server instance"""
    # similar to func list_remote_files_fsspec, but we don't want to get full depth
    fs = HTTPFileSystem(
        use_listings_cache=True,
        listings_expiry_time=CacheExpiry.TWELVE_HOURS.value,
        listings_cache_type="filedircache",
        listings_cache_location=cache_dir,
        client_kwargs=FSSPEC_CLIENT_KWARGS,
    )

    base_url = "https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/"

    files = fs.expand_path(base_url, recursive=True, maxdepth=3)

    df = pd.DataFrame({"files": files})

    df.files = df.files.str[len(base_url):-1]

    # filter resolution folders
    df = df.loc[df.files.str.count("/") == 1, :]

    df.loc[:, ["resolution", "dataset"]] = df.pop("files").str.split(
        "/").tolist()

    for _, (resolution, dataset) in df.iterrows():
        rd_pair = (resolution, dataset)

        if rd_pair in SKIP_DATASETS:
            continue

        resolution = parse_enumeration_from_template(resolution,
                                                     DwdObservationResolution,
                                                     Resolution)
        dataset = DwdObservationDataset(dataset)

        assert dataset in RESOLUTION_DATASET_MAPPING[resolution].keys()
        assert DwdObservationParameter[resolution.name][dataset.name]
Ejemplo n.º 18
0
    def __init__(
        self,
        station_ids: List[Union[int, str]],
        parameters: List[
            Union[str, DWDObservationParameter, DWDObservationParameterSet]
        ],
        resolution: Union[str, DWDObservationResolution],
        periods: Optional[List[Union[str, DWDObservationPeriod]]] = None,
        start_date: Union[None, str, Timestamp, datetime] = None,
        end_date: Union[None, str, Timestamp, datetime] = None,
        storage: StorageAdapter = None,
        tidy_data: bool = True,
        humanize_column_names: bool = False,
    ) -> None:
        """
        Class with mostly flexible arguments to define a request regarding DWD data.
        Special handling for period type. If start_date/end_date are given all period
        types are considered and merged together and the data is filtered for the given
        dates afterwards.

        :param station_ids: definition of stations by str, int or list of str/int,
                            will be parsed to list of int
        :param parameters:           Observation measure
        :param resolution:     Frequency/granularity of measurement interval
        :param periods:         Recent or historical files (optional), if None
                                    and start_date and end_date None, all period
                                    types are used
        :param start_date:          Replacement for period type to define exact time
                                    of requested data, if used, period type will be set
                                    to all period types (hist, recent, now)
        :param end_date:            Replacement for period type to define exact time
                                    of requested data, if used, period type will be set
                                    to all period types (hist, recent, now)
        :param storage:             Storage adapter.
        :param tidy_data:           Reshape DataFrame to a more tidy
                                    and row-based version of data
        :param humanize_column_names: Replace column names by more meaningful ones
        """

        try:
            self.station_ids = pd.Series(station_ids).astype(int).tolist()
        except ValueError:
            raise ValueError("List of station id's can not be parsed to integers.")

        self.resolution = parse_enumeration_from_template(
            resolution, DWDObservationResolution
        )

        self.parameters = []

        for parameter in pd.Series(parameters):
            try:
                (
                    parameter,
                    parameter_set,
                ) = create_parameter_to_parameter_set_combination(
                    parameter, self.resolution
                )
                self.parameters.append((parameter, parameter_set))
            except InvalidParameter as e:
                log.info(str(e))

        if not self.parameters:
            raise NoParametersFound(f"No parameters could be parsed from {parameters}")

        # If any date is given, use all period types and filter, else if not period type
        # is given use all period types
        if start_date or end_date or not periods:
            self.periods = [*DWDObservationPeriod]
        # Otherwise period types will be parsed
        else:
            # For the case that a period_type is given, parse the period type(s)
            self.periods = (
                pd.Series(periods)
                .apply(parse_enumeration_from_template, args=(DWDObservationPeriod,))
                .sort_values()
                .tolist()
            )

        if start_date or end_date:
            # If only one date given, make the other one equal
            if not start_date:
                start_date = end_date

            if not end_date:
                end_date = start_date

            self.start_date = Timestamp(dateparser.parse(str(start_date)))
            self.end_date = Timestamp(dateparser.parse(str(end_date)))

            if not self.start_date <= self.end_date:
                raise StartDateEndDateError(
                    "Error: 'start_date' must be smaller or equal to 'end_date'."
                )
        else:
            self.start_date = start_date
            self.end_date = end_date

        self.storage = storage

        # If more then one parameter requested, automatically tidy data
        self.tidy_data = (
            len(self.parameters) > 1
            or any(
                [
                    not isinstance(parameter, DWDObservationParameterSet)
                    for parameter, parameter_set in self.parameters
                ]
            )
            or tidy_data
        )
        self.humanize_column_names = humanize_column_names
Ejemplo n.º 19
0
def dwd_readings(
    product: str,
    station: str = Query(default=None),
    parameter: str = Query(default=None),
    resolution: str = Query(default=None),
    period: str = Query(default=None),
    mosmix_type: str = Query(default=None, alias="mosmix-type"),
    date: str = Query(default=None),
    sql: str = Query(default=None),
):
    """
    Acquire data from DWD.

    # TODO: Obtain lat/lon distance/number information.

    :param product:     string for product, either observations or mosmix
    :param station:     Comma-separated list of station identifiers.
    :param parameter:   Observation measure
    :param resolution:  Frequency/granularity of measurement interval
    :param period:      Recent or historical files
    :param mosmix_type  type of mosmix, either small or large
    :param date:        Date or date range
    :param sql:         SQL expression
    :return:
    """
    if product not in ["observations", "mosmix"]:
        return HTTPException(status_code=404, detail=f"product {product} not found")

    if station is None:
        raise HTTPException(
            status_code=400, detail="Query argument 'station' is required"
        )

    station_ids = map(str, read_list(station))

    if product == "observations":
        if parameter is None or resolution is None or period is None:
            raise HTTPException(
                status_code=400,
                detail="Query arguments 'parameter', 'resolution' "
                "and 'period' are required",
            )

        parameter = parse_enumeration_from_template(
            parameter, DWDObservationParameterSet
        )
        resolution = parse_enumeration_from_template(
            resolution, DWDObservationResolution
        )
        period = parse_enumeration_from_template(period, DWDObservationPeriod)

        # Data acquisition.
        readings = DWDObservationData(
            station_ids=station_ids,
            parameters=parameter,
            resolution=resolution,
            periods=period,
            tidy_data=True,
            humanize_parameters=True,
        )
    else:
        if mosmix_type is None:
            raise HTTPException(
                status_code=400, detail="Query argument 'mosmix_type' is required"
            )

        mosmix_type = parse_enumeration_from_template(mosmix_type, DWDMosmixType)

        readings = DWDMosmixData(station_ids=station_ids, mosmix_type=mosmix_type)

    # Postprocessing.
    df = readings.all()

    if date is not None:
        df = df.dwd.filter_by_date(date, resolution)

    df = df.dwd.lower()

    if sql is not None:
        df = df.io.sql(sql)

    data = json.loads(df.to_json(orient="records", date_format="iso"))

    return make_json_response(data)
Ejemplo n.º 20
0
    def __init__(
        self,
        parameter: Union[str, DWDRadarParameter],
        site: Optional[DWDRadarSite] = None,
        fmt: Optional[DWDRadarDataFormat] = None,
        subset: Optional[DWDRadarDataSubset] = None,
        elevation: Optional[int] = None,
        start_date: Optional[Union[str, datetime, DWDRadarDate]] = None,
        end_date: Optional[Union[str, datetime, timedelta]] = None,
        resolution: Optional[Union[str, DWDRadarResolution]] = None,
        period: Optional[Union[str, DWDRadarPeriod]] = None,
    ) -> None:
        """
        :param parameter:       The radar moment to request
        :param site:            Site/station if parameter is one of
                                RADAR_PARAMETERS_SITES
        :param fmt:          Data format (BINARY, BUFR, HDF5)
        :param subset:          The subset (simple or polarimetric) for HDF5 data.
        :param start_date:      Start date
        :param end_date:        End date
        :param resolution: Time resolution for RadarParameter.RADOLAN_CDC,
                                either daily or hourly or 5 minutes.
        :param period:     Period type for RadarParameter.RADOLAN_CDC
        :return:                Nothing for now.
        """

        # Convert parameters to enum types.
        self.parameter = parse_enumeration_from_template(parameter, DWDRadarParameter)
        self.site = parse_enumeration_from_template(site, DWDRadarSite)
        self.format = parse_enumeration_from_template(fmt, DWDRadarDataFormat)
        self.subset = parse_enumeration_from_template(subset, DWDRadarDataSubset)
        self.elevation = elevation and int(elevation)
        self.resolution = parse_enumeration_from_template(
            resolution, DWDRadarResolution
        )
        self.period = parse_enumeration_from_template(period, DWDRadarPeriod)

        # Sanity checks.
        if self.parameter == DWDRadarParameter.RADOLAN_CDC:

            if resolution not in (
                DWDRadarResolution.DAILY,
                DWDRadarResolution.HOURLY,
                DWDRadarResolution.MINUTE_5,
            ):
                raise ValueError(
                    "RADOLAN_CDC only supports daily, hourly and 5 minutes resolutions"
                )

        elevation_parameters = [
            DWDRadarParameter.SWEEP_VOL_VELOCITY_H,
            DWDRadarParameter.SWEEP_VOL_REFLECTIVITY_H,
        ]
        if self.elevation is not None and self.parameter not in elevation_parameters:
            raise ValueError(
                f"Argument 'elevation' only valid for parameter={elevation_parameters}"
            )

        if start_date == DWDRadarDate.LATEST:

            # HDF5 folders do not have "-latest-" files.
            if self.parameter == DWDRadarParameter.RADOLAN_CDC:
                raise ValueError("RADOLAN_CDC data has no '-latest-' files")

            # HDF5 folders do not have "-latest-" files.
            if self.format == DWDRadarDataFormat.HDF5:
                raise ValueError("HDF5 data has no '-latest-' files")

        if start_date == DWDRadarDate.CURRENT and not self.period:
            self.period = DWDRadarPeriod.RECENT

        # Evaluate "RadarDate.MOST_RECENT" for "start_date".
        #
        # HDF5 folders do not have "-latest-" files, so we will have to synthesize them
        # appropriately by going back to the second last volume of 5 minute intervals.
        #
        # The reason for this is that when requesting sweep data in HDF5 format at
        # e.g. 15:03, not all files will be available on the DWD data repository for
        # the whole volume (e.g. covering all elevation levels) within the time range
        # of 15:00-15:04:59 as they apparently will be added incrementally while the
        # scan is performed.
        #
        # So, we will be better off making the machinery retrieve the latest "full"
        # volume by addressing the **previous** volume. So, when requesting data at
        # 15:03, it will retrieve 14:55:00-14:59:59.
        #
        if fmt == DWDRadarDataFormat.HDF5 and start_date == DWDRadarDate.MOST_RECENT:
            start_date = datetime.utcnow() - timedelta(minutes=5)
            end_date = None

        if (
            start_date == DWDRadarDate.MOST_RECENT
            and parameter == DWDRadarParameter.RADOLAN_CDC
        ):
            start_date = datetime.utcnow() - timedelta(minutes=50)
            end_date = None

        # Evaluate "RadarDate.CURRENT" for "start_date".
        if start_date == DWDRadarDate.CURRENT:
            start_date = datetime.utcnow()
            end_date = None

        # Evaluate "RadarDate.LATEST" for "start_date".
        if start_date == DWDRadarDate.LATEST:
            self.start_date = start_date
            self.end_date = None

        # Evaluate any datetime for "start_date".
        else:
            self.start_date = pd.to_datetime(start_date, infer_datetime_format=True)
            self.end_date = end_date
            self.adjust_datetimes()

        log.info(
            f"DWDRadarRequest with {self.parameter}, {self.site}, "
            f"{self.format}, {self.resolution} "
            f"for {self.start_date}/{self.end_date}"
        )
Ejemplo n.º 21
0
def dwd_readings(
    station: str = Query(default=None),
    parameter: str = Query(default=None),
    resolution: str = Query(default=None),
    period: str = Query(default=None),
    date: str = Query(default=None),
    sql: str = Query(default=None),
):
    """
    Acquire data from DWD.

    # TODO: Obtain lat/lon distance/number information.

    :param station:     Comma-separated list of station identifiers.
    :param parameter:   Observation measure
    :param resolution:  Frequency/granularity of measurement interval
    :param period:      Recent or historical files
    :param date:        Date or date range
    :param sql:         SQL expression
    :return:
    """

    if station is None:
        raise HTTPException(
            status_code=400, detail="Query argument 'station' is required"
        )

    if parameter is None or resolution is None or period is None:
        raise HTTPException(
            status_code=400,
            detail="Query arguments 'parameter', 'resolution' "
            "and 'period' are required",
        )

    station_ids = map(int, read_list(station))
    parameter = parse_enumeration_from_template(parameter, DWDObservationParameterSet)
    resolution = parse_enumeration_from_template(resolution, DWDObservationResolution)
    period = parse_enumeration_from_template(period, DWDObservationPeriod)

    # Data acquisition.
    observations = DWDObservationData(
        station_ids=station_ids,
        parameters=parameter,
        resolution=resolution,
        periods=period,
        tidy_data=True,
        humanize_column_names=True,
    )

    # Postprocessing.
    df = observations.collect_safe()

    if date is not None:
        df = df.dwd.filter_by_date(date, resolution)

    df = df.dwd.lower()

    if sql is not None:
        df = df.io.sql(sql)

    data = json.loads(df.to_json(orient="records", date_format="iso"))
    return make_json_response(data)
Ejemplo n.º 22
0
    def __init__(
        self,
        parameter: Tuple[Union[str, Enum]],
        resolution: Resolution,
        period: Period,
        start_date: Optional[Union[str, datetime, pd.Timestamp]] = None,
        end_date: Optional[Union[str, datetime, pd.Timestamp]] = None,
    ) -> None:
        """

        :param parameter: requested parameter(s)
        :param resolution: requested resolution
        :param period: requested period(s)
        :param start_date: Start date for filtering stations_result for their available data
        :param end_date:   End date for filtering stations_result for their available data
        """
        settings = copy(Settings)
        super().__init__()

        self.resolution = parse_enumeration_from_template(
            resolution, self._resolution_base, Resolution)
        self.period = self._parse_period(period)

        self.start_date, self.end_date = self.convert_timestamps(
            start_date, end_date)
        self.parameter = self._parse_parameter(parameter)

        if not self.parameter:
            raise NoParametersFound(
                "no valid parameters could be parsed from given argument")

        self.humanize = settings.humanize

        tidy = settings.tidy
        if self._has_datasets:
            tidy = tidy or any([
                parameter not in self._dataset_base
                for parameter, dataset in self.parameter
            ])
        self.tidy = tidy

        self.si_units = settings.si_units

        # skip empty stations
        self.skip_empty = self.tidy and settings.skip_empty
        self.skip_threshold = settings.skip_threshold
        self.dropna = self.tidy and settings.dropna

        if not tidy and settings.skip_empty:
            log.warning(
                "option 'skip_empty' is only available with option 'tidy' and is thus ignored in this request."
            )

        if not tidy and settings.dropna:
            log.warning(
                "option 'dropna' is only available with option 'tidy' and is thus ignored in this request."
            )

        # optional attribute for dynamic resolutions
        if self.resolution == Resolution.DYNAMIC:
            self._dynamic_frequency = None

        log.info(f"Processing request {self.__repr__()}")
Ejemplo n.º 23
0
 def dynamic_frequency(self, df) -> None:
     if df:
         self._dynamic_frequency = parse_enumeration_from_template(
             df, Frequency)
Ejemplo n.º 24
0
    def __init__(
        self,
        parameter: Optional[Tuple[Union[str, DwdMosmixParameter], ...]],
        mosmix_type: Union[str, DwdMosmixType],
        start_issue: Optional[Union[str, datetime,
                                    DwdForecastDate]] = DwdForecastDate.LATEST,
        end_issue: Optional[Union[str, datetime]] = None,
        start_date: Optional[Union[str, datetime]] = None,
        end_date: Optional[Union[str, datetime]] = None,
    ) -> None:
        """

        :param parameter: parameter(s) to be collected
        :param mosmix_type: mosmix type, either small or large
        :param start_issue: start of issue of mosmix which should be caught (Mosmix run at time XX:YY)
        :param end_issue: end of issue
        :param start_date: start date for filtering returned dataframe
        :param end_date: end date
        """
        self.mosmix_type = parse_enumeration_from_template(
            mosmix_type, DwdMosmixType)

        super().__init__(
            parameter=parameter,
            start_date=start_date,
            end_date=end_date,
            resolution=Resolution.HOURLY,
            period=Period.FUTURE,
        )

        if not start_issue:
            start_issue = DwdForecastDate.LATEST

        try:
            start_issue = parse_enumeration_from_template(
                start_issue, DwdForecastDate)
        except InvalidParameter:
            pass

        # Parse issue date if not set to fixed "latest" string
        if start_issue is DwdForecastDate.LATEST and end_issue:
            log.info(
                "end_issue will be ignored as 'latest' was selected for issue date"
            )

        if start_issue is not DwdForecastDate.LATEST:
            if not start_issue and not end_issue:
                start_issue = DwdForecastDate.LATEST
            elif not end_issue:
                end_issue = start_issue
            elif not start_issue:
                start_issue = end_issue

            start_issue = pd.to_datetime(
                start_issue, infer_datetime_format=True).floor("1H")
            end_issue = pd.to_datetime(end_issue,
                                       infer_datetime_format=True).floor("1H")

            # Shift start date and end date to 3, 9, 15, 21 hour format
            if mosmix_type == DwdMosmixType.LARGE:
                start_issue = self.adjust_datetime(start_issue)
                end_issue = self.adjust_datetime(end_issue)

        # TODO: this should be replaced by the freq property in the main class
        if self.mosmix_type == DwdMosmixType.SMALL:
            self.resolution = Resolution.HOURLY
        else:
            self.resolution = Resolution.HOUR_6

        self.start_issue = start_issue
        self.end_issue = end_issue
Ejemplo n.º 25
0
def get_stations(
    api,
    parameter: List[str],
    resolution: str,
    period: List[str],
    date: Optional[str],
    issue: str,
    all_,
    station_id: List[str],
    name: str,
    coordinates: str,
    rank: int,
    distance: float,
    bbox: str,
    sql: str,
    si_units: bool,
    tidy: bool,
    humanize: bool,
    skip_empty: bool,
    skip_threshold: float,
    dropna: bool,
) -> StationsResult:
    """
    Core function for querying stations_result via cli and restapi

    :param api:
    :param parameter:
    :param resolution:
    :param period:
    :param date:
    :param issue:
    :param all_:
    :param station_id:
    :param name:#
    :param coordinates:
    :param rank:
    :param distance:
    :param bbox:
    :param sql:
    :param date:
    :param si_units:
    :param tidy:
    :param humanize:
    :param skip_empty:
    :param skip_threshold:
    :param dropna:
    :return:
    """
    # TODO: move this into Request core
    start_date, end_date = None, None
    if date:
        # TODO: use rather network here
        if api == DwdMosmixRequest:
            mosmix_type = DwdMosmixType[resolution.upper()]

            if mosmix_type == DwdMosmixType.SMALL:
                res = Resolution.HOURLY
            else:
                res = Resolution.HOUR_6
        else:
            res = parse_enumeration_from_template(resolution,
                                                  api._resolution_base,
                                                  Resolution)

        # Split date string into start and end date string
        start_date, end_date = create_date_range(date=date, resolution=res)

    if api._data_range == DataRange.LOOSELY and not start_date and not end_date:
        # TODO: use another property "network" on each class
        raise TypeError(
            f"Combination of provider {api.provider.name} and network {api.kind.name} requires start and end date"
        )

    # Todo: We may have to apply other measures to allow for
    #  different request initializations
    # DWD Mosmix has fixed resolution and rather uses SMALL
    # and large for the different datasets

    kwargs = {
        "parameter": unpack_parameters(parameter),
        "start_date": start_date,
        "end_date": end_date,
    }
    if api.provider == Provider.DWD and api.kind == Kind.FORECAST:
        kwargs["mosmix_type"] = resolution
        kwargs["start_issue"] = issue
    elif api._resolution_type == ResolutionType.MULTI:
        kwargs["resolution"] = resolution

    if api._period_type == PeriodType.MULTI:
        kwargs["period"] = period

    with Settings:
        Settings.tidy = tidy
        Settings.humanize = humanize
        Settings.si_units = si_units
        Settings.skip_empty = skip_empty
        Settings.skip_threshold = skip_threshold
        Settings.dropna = dropna

        r = api(**kwargs)

    if all_:
        return r.all()

    elif station_id:
        return r.filter_by_station_id(station_id)

    elif name:
        return r.filter_by_name(name)

    # Use coordinates twice in main if-elif to get same KeyError
    elif coordinates and rank:
        lat, lon = coordinates.split(",")

        return r.filter_by_rank(
            latitude=float(lat),
            longitude=float(lon),
            rank=rank,
        )

    elif coordinates and distance:
        lat, lon = coordinates.split(",")

        return r.filter_by_distance(
            latitude=float(lat),
            longitude=float(lon),
            distance=distance,
        )

    elif bbox:
        try:
            left, bottom, right, top = bbox.split(",")
        except ValueError as e:
            raise ValueError(
                "bbox requires four floats separated by comma") from e

        return r.filter_by_bbox(
            left=float(left),
            bottom=float(bottom),
            right=float(right),
            top=float(top),
        )

    elif sql:
        return r.filter_by_sql(sql)

    else:
        param_options = [
            "all (boolean)",
            "station (string)",
            "name (string)",
            "coordinates (float,float) and rank (integer)",
            "coordinates (float,float) and distance (float)",
            "bbox (left float, bottom float, right float, top float)",
        ]
        raise KeyError(
            f"Give one of the parameters: {', '.join(param_options)}")
Ejemplo n.º 26
0
    def __init__(
        self,
        station_ids: Tuple[str],
        mosmix_type: Union[str, DWDMosmixType],
        parameters: Optional[Tuple[Union[str, DWDMosmixParameter]]] = None,
        start_issue: Optional[
            Union[str, datetime, DWDForecastDate]
        ] = DWDForecastDate.LATEST,
        end_issue: Optional[Union[str, datetime]] = None,
        start_date: Optional[Union[str, datetime]] = None,
        end_date: Optional[Union[str, datetime]] = None,
        humanize_parameters: bool = False,
        tidy_data: bool = True,
    ) -> None:
        """

        :param station_ids: station ids which are being queried from the MOSMIX foreacst
        :param mosmix_type: type of forecast, either small (MOSMIX-S) or large
        (MOSMIX-L), as string or enumeration
        :param parameters: optional parameters for which the forecasts are filtered
        :param start_issue: start date of the MOSMIX forecast, can be used in
        combination with end_issue to query multiple MOSMIX forecasts, or instead used
        with enumeration to only query LATEST MOSMIX forecast
        :param end_issue: end issue of MOSMIX forecast, can be used to query multiple
        MOSMIX forecasts available on the server
        :param start_date: start date to limit the returned data to specified datetimes
        :param end_date: end date to limit the returned data to specified datetimes
        :param humanize_parameters: boolean if parameters shall be renamed to human
        readable names
        :param tidy_data: boolean if pandas.DataFrame shall be tidied and
        values put in rows
        """
        # Use all parameters if none are given
        parameters = parameters or [*self._parameter_base]

        super(DWDMosmixData, self).__init__(
            station_ids=station_ids,
            parameters=parameters,
            start_date=start_date,
            end_date=end_date,
            humanize_parameters=humanize_parameters,
        )
        self.mosmix_type = parse_enumeration_from_template(mosmix_type, DWDMosmixType)

        # Parse issue date if not set to fixed "latest" string
        if start_issue is DWDForecastDate.LATEST and end_issue:
            log.info(
                "end_issue will be ignored as 'latest' was selected for issue date"
            )

        if start_issue is not DWDForecastDate.LATEST:
            if not start_issue and not end_issue:
                start_issue = DWDForecastDate.LATEST
            elif not end_issue:
                end_issue = start_issue
            elif not start_issue:
                start_issue = end_issue

            start_issue = pd.to_datetime(start_issue, infer_datetime_format=True).floor(
                "1H"
            )
            end_issue = pd.to_datetime(end_issue, infer_datetime_format=True).floor(
                "1H"
            )

            # Shift start date and end date to 3, 9, 15, 21 hour format
            if mosmix_type == DWDMosmixType.LARGE:
                start_issue = self.adjust_datetime(start_issue)
                end_issue = self.adjust_datetime(end_issue)

        self.start_issue = start_issue
        self.end_issue = end_issue
        self.humanize_parameters = humanize_parameters
        self.tidy_data = tidy_data

        # TODO: this should be replaced by the freq property in the main class
        if self.mosmix_type == DWDMosmixType.SMALL:
            self.freq = "1H"  # short forecasts released every hour
        else:
            self.freq = "6H"

        self.kml = KMLReader(station_ids=self.station_ids, parameters=self.parameters)
Ejemplo n.º 27
0
    def __init__(
        self,
        station_ids: List[Union[int, str]],
        parameters: List[Union[str, DWDObservationParameter,
                               DWDObservationParameterSet]],
        resolution: Union[str, DWDObservationResolution],
        periods: Optional[List[Union[str, DWDObservationPeriod]]] = None,
        start_date: Optional[Union[str, Timestamp, datetime]] = None,
        end_date: Optional[Union[str, Timestamp, datetime]] = None,
        tidy_data: bool = True,
        humanize_parameters: bool = False,
    ) -> None:
        """
        Class with mostly flexible arguments to define a request regarding DWD data.
        Special handling for period type. If start_date/end_date are given all period
        types are considered and merged together and the data is filtered for the given
        dates afterwards.

        :param station_ids: definition of stations by str, int or list of str/int,
                            will be parsed to list of int
        :param parameters:           Observation measure
        :param resolution:     Frequency/granularity of measurement interval
        :param periods:         Recent or historical files (optional), if None
                                    and start_date and end_date None, all period
                                    types are used
        :param start_date:          Replacement for period type to define exact time
                                    of requested data, if used, period type will be set
                                    to all period types (hist, recent, now)
        :param end_date:            Replacement for period type to define exact time
                                    of requested data, if used, period type will be set
                                    to all period types (hist, recent, now)
        :param tidy_data:           Reshape DataFrame to a more tidy
                                    and row-based version of data
        :param humanize_parameters: Replace column names by more meaningful ones
        """
        station_ids = pd.Series(station_ids).astype(str).str.pad(
            5, "left", "0")

        if not station_ids.str.isdigit().all():
            raise ValueError("station identifiers of DWD only contain digits")

        # Required before super call as parameter parsing for dwd requires resolution
        self.resolution = parse_enumeration_from_template(
            resolution, DWDObservationResolution)

        super(DWDObservationData, self).__init__(
            station_ids=station_ids,
            parameters=parameters,
            start_date=start_date,
            end_date=end_date,
            humanize_parameters=humanize_parameters,
        )

        # If any date is given, use all period types and filter, else if not period type
        # is given use all period types
        if not periods:
            if self.start_date:
                periods = self._get_periods()
            else:
                periods = [*DWDObservationPeriod]
        else:
            # For the case that a period_type is given, parse the period type(s)
            periods = (pd.Series(periods).apply(
                parse_enumeration_from_template,
                args=(DWDObservationPeriod, )).sort_values().tolist())

            if start_date or end_date:
                log.warning(
                    f"start_date and end_date filtering limited to defined "
                    f"periods {periods}")

        # For requests with start date and end date set in the future, we wont expect
        # any periods to be selected
        if not periods:
            log.warning(
                "start date and end date are out of range of any period.")

        self.periods = periods

        # If more then one parameter requested, automatically tidy data
        self.tidy_data = (tidy_data or len(self.parameters) > 1 or any([
            not isinstance(parameter, DWDObservationParameterSet)
            for parameter, parameter_set in self.parameters
        ]))
Ejemplo n.º 28
0
    def _parse_parameter(self, parameter: List[Union[str, Enum]]) -> List[Enum]:
        """
        Method to parse parameters, either from string or enum. Case independent for
        strings.

        :param parameter: parameters as strings or enumerations
        :return: list of parameter enumerations of type self._parameter_base
        """
        # TODO: refactor this!
        # for logging
        enums = []
        if self._dataset_base:
            enums.append(self._dataset_base)

        enums.append(self._parameter_base)

        parameters = []

        for parameter in pd.Series(parameter):
            parameter_ = None

            if self._dataset_base:
                try:
                    parameter_ = parse_enumeration_from_template(
                        parameter, self._dataset_base
                    )
                except InvalidEnumeration:
                    pass
                else:
                    parameters.append((parameter_, parameter_))
                    continue

                try:
                    parameter_ = parse_enumeration_from_template(
                        parameter, self._parameter_base[self._dataset_accessor]
                    )
                    if self._unique_dataset:
                        dataset = self._dataset_base[self._dataset_accessor]
                    else:
                        dataset = self._parameter_to_dataset_mapping[self.resolution][
                            parameter_
                        ]
                        parameter_ = self._dataset_tree[self._dataset_accessor][
                            dataset.name
                        ][parameter_.name]
                except InvalidEnumeration:
                    pass
                else:
                    parameters.append((parameter_, dataset))
                    continue

            try:
                parameter_ = parse_enumeration_from_template(
                    parameter, self._parameter_base[self._dataset_accessor]
                )
                parameters.append(parameter_)
            except InvalidEnumeration:
                pass

            if not parameter_:
                log.info(f"parameter {parameter} could not be parsed from ({enums})")

        return parameters
Ejemplo n.º 29
0
    def _build_complete_df(
        self, df: pd.DataFrame, station_id: str, parameter: Enum
    ) -> pd.DataFrame:
        # For cases where requests are not defined by start and end date but rather by
        # periods, use the returned df without modifications
        # We may put a standard date range here if no data is found
        if not self.stations.start_date:
            return df

        dataset = None
        if self.stations.stations._has_datasets:
            parameter, dataset = parameter

        if parameter != dataset or not self.stations.stations.tidy:
            df = pd.merge(
                left=self._base_df,
                right=df,
                left_on=Columns.DATE.value,
                right_on=Columns.DATE.value,
                how="left",
            )

            df[Columns.STATION_ID.value] = station_id

            if self.stations.tidy:
                df[Columns.PARAMETER.value] = parameter.value
                df[Columns.PARAMETER.value] = pd.Categorical(
                    df[Columns.PARAMETER.value]
                )

                if dataset:
                    df[Columns.DATASET.value] = dataset.name.lower()
                    df[Columns.DATASET.value] = pd.Categorical(
                        df[Columns.DATASET.value]
                    )

            return df
        else:
            data = []
            for parameter, group in df.groupby(Columns.PARAMETER.value, sort=False):
                if self.stations.stations._unique_dataset:
                    parameter_ = parse_enumeration_from_template(
                        parameter,
                        self.stations.stations._parameter_base[
                            self.stations.resolution.name
                        ],
                    )
                else:
                    parameter_ = parse_enumeration_from_template(
                        parameter,
                        self.stations.stations._dataset_tree[
                            self.stations.resolution.name
                        ][dataset.name],
                    )

                df = pd.merge(
                    left=self._base_df,
                    right=group,
                    left_on=Columns.DATE.value,
                    right_on=Columns.DATE.value,
                    how="left",
                )

                df[Columns.STATION_ID.value] = station_id

                df[Columns.PARAMETER.value] = parameter_.value

                df[Columns.DATASET.value] = dataset.name.lower()
                df[Columns.DATASET.value] = pd.Categorical(df[Columns.DATASET.value])

                data.append(df)

            return pd.concat(data)