Ejemplo n.º 1
0
    def __init__(
        self,
        start_date: Union[None, str, Timestamp] = None,
        end_date: Union[None, str, Timestamp] = None,
    ) -> None:
        """

        :param start_date: start date for filtering stations for their available data
        :param end_date: end date for filtering stations for their available data
        """
        # TODO: make datetimes timezone sensible
        start_date = (start_date if not start_date or isinstance(
            start_date, datetime) else parse_datetime(start_date))
        end_date = (end_date if not end_date or isinstance(end_date, datetime)
                    else parse_datetime(end_date))

        start_date = start_date.replace(tzinfo=self.tz) if start_date else None
        end_date = end_date.replace(tzinfo=self.tz) if end_date else None

        if start_date and end_date:
            if start_date > end_date:
                raise StartDateEndDateError(
                    "'start_date' has to be before 'end_date'")

        self.start_date = start_date
        self.end_date = end_date
Ejemplo n.º 2
0
    def __init__(
        self,
        station_ids: Tuple[str],
        parameters: Tuple[Union[str, Enum]],
        start_date: Optional[Union[str, Timestamp, datetime]] = None,
        end_date: Optional[Union[str, Timestamp, datetime]] = None,
        humanize_parameters: bool = False,
    ) -> None:
        """

        :param station_ids: station ids for which data is requested
        :param parameters: parameters either as strings or enumerations for which data
        is requested
        :param start_date: start date of the resulting data,
        if not start_date: start_date = end_date
        :param end_date: end date of the resulting data
        if not end_date: end_date = start_date
        :param humanize_parameters: bool if parameters should be renamed to meaningful
        names
        """
        # Make sure we receive a list of ids
        self.station_ids = pd.Series(station_ids).astype(str).tolist()
        self.parameters = self._parse_parameters(parameters)

        # TODO: replace this with a response + logging
        # TODO: move this to self.collect_data
        if not self.parameters:
            raise NoParametersFound(
                f"No parameters could be parsed from {parameters}")

        if start_date or end_date:
            # If only one date given, set the other one to equal
            if not start_date:
                start_date = end_date

            if not end_date:
                end_date = start_date

            # TODO: use dynamic parsing that accepts entered timestamps with given
            #  timezone
            start_date = Timestamp(dateparser.parse(str(start_date)),
                                   tz=pytz.UTC)
            end_date = Timestamp(dateparser.parse(str(end_date)), tz=pytz.UTC)

            # TODO: replace this with a response + logging
            if not start_date <= end_date:
                raise StartDateEndDateError(
                    "Error: 'start_date' must be smaller or equal to 'end_date'."
                )

        self.start_date = start_date
        self.end_date = end_date
        self.humanize_parameters = humanize_parameters
Ejemplo n.º 3
0
    def convert_timestamps(
        start_date: Optional[Union[str, datetime, pd.Timestamp]] = None,
        end_date: Optional[Union[str, datetime, pd.Timestamp]] = None,
    ) -> Union[Tuple[None, None], Tuple[pd.Timestamp, pd.Timestamp]]:
        """
        Sort out start_date vs. end_date, parse strings to datetime
        objects and finally convert both to pd.Timestamp types.

        :param start_date: Start date for filtering stations for their available data
        :param end_date:   End date for filtering stations for their available data
        :return:           pd.Timestamp objects tuple of (start_date, end_date)
        """

        if start_date is None and end_date is None:
            return None, None

        if start_date:
            if isinstance(start_date, str):
                start_date = dateutil.parser.isoparse(start_date)
            if not start_date.tzinfo:
                start_date = start_date.replace(tzinfo=pytz.UTC)

        if end_date:
            if isinstance(end_date, str):
                end_date = dateutil.parser.isoparse(end_date)
            if not end_date.tzinfo:
                end_date = end_date.replace(tzinfo=pytz.UTC)

        # If only one date given, set the other one to equal.
        if not start_date:
            start_date = end_date

        if not end_date:
            end_date = start_date

        # TODO: replace this with a response + logging
        if not start_date <= end_date:
            raise StartDateEndDateError(
                "Error: 'start_date' must be smaller or equal to 'end_date'."
            )

        return pd.Timestamp(start_date), pd.Timestamp(end_date)
Ejemplo n.º 4
0
    def __init__(
        self,
        start_date: Union[None, str, Timestamp] = None,
        end_date: Union[None, str, Timestamp] = None,
    ) -> None:
        start_date = (
            start_date
            if not start_date or isinstance(start_date, datetime)
            else parse_datetime(start_date)
        )
        end_date = (
            end_date
            if not end_date or isinstance(end_date, datetime)
            else parse_datetime(end_date)
        )

        if start_date and end_date:
            if start_date > end_date:
                raise StartDateEndDateError("'start_date' has to be before 'end_date'")

        self.start_date = start_date
        self.end_date = end_date
Ejemplo n.º 5
0
    def __init__(
        self,
        station_ids: Union[str, int, List[Union[int, str]]],
        parameter: Union[str, Parameter, List[Union[str, Parameter]]],
        time_resolution: Union[str, TimeResolution],
        period_type: Union[Union[None, str, PeriodType],
                           List[Union[str, PeriodType]]] = None,
        start_date: Union[None, str, Timestamp] = None,
        end_date: Union[None, str, Timestamp] = None,
        prefer_local: bool = False,
        write_file: bool = False,
        folder: Union[str, Path] = DWD_FOLDER_MAIN,
        tidy_data: bool = True,
        humanize_column_names: bool = False,
    ) -> None:
        """
        Class with mostly flexible arguments to define a request regarding DWD data.
        Special handling for period type. If start_date/end_date are given all period
        types are considered and merged together and the data is filtered for the given
        dates afterwards.

        :param station_ids: definition of stations by str, int or list of str/int,
                            will be parsed to list of int
        :param parameter:           Observation measure
        :param time_resolution:     Frequency/granularity of measurement interval
        :param period_type:         Recent or historical files (optional), if None
                                    and start_date and end_date None, all period
                                    types are used
        :param start_date:          Replacement for period type to define exact time
                                    of requested data, if used, period type will be set
                                    to all period types (hist, recent, now)
        :param end_date:            Replacement for period type to define exact time
                                    of requested data, if used, period type will be set
                                    to all period types (hist, recent, now)
        :param prefer_local:        Definition if data should rather be taken from a
                                    local source
        :param write_file:          Should data be written to a local file
        :param folder:              Place where file lists (and station data) are stored
        :param tidy_data:           Reshape DataFrame to a more tidy
                                    and row-based version of data
        :param humanize_column_names: Replace column names by more meaningful ones
        """

        try:
            self.station_ids = pd.Series(station_ids).astype(int).tolist()
        except ValueError:
            raise ValueError(
                "List of station id's can not be parsed to integers.")

        self.parameter = (pd.Series(parameter).apply(
            parse_enumeration_from_template, args=(Parameter, )).tolist())

        self.time_resolution = parse_enumeration_from_template(
            time_resolution, TimeResolution)

        # If any date is given, use all period types and filter, else if not period type
        # is given use all period types
        if start_date or end_date or not period_type:
            self.period_type = [*PeriodType]
        # Otherwise period types will be parsed
        else:
            # For the case that a period_type is given, parse the period type(s)
            self.period_type = (pd.Series(period_type).apply(
                parse_enumeration_from_template,
                args=(PeriodType, )).sort_values().tolist())

        if start_date or end_date:
            # If only one date given, make the other one equal
            if not start_date:
                start_date = end_date

            if not end_date:
                end_date = start_date

            self.start_date = Timestamp(dateparser.parse(start_date))
            self.end_date = Timestamp(dateparser.parse(end_date))

            if not self.start_date <= self.end_date:
                raise StartDateEndDateError(
                    "Error: 'start_date' must be smaller or equal to 'end_date'."
                )
        else:
            self.start_date = start_date
            self.end_date = end_date

        self.prefer_local = prefer_local
        self.write_file = write_file
        self.folder = folder
        # If more then one parameter requested, automatically tidy data
        self.tidy_data = len(self.parameter) == 2 or tidy_data
        self.humanize_column_names = humanize_column_names
Ejemplo n.º 6
0
    def __init__(
        self,
        station_ids: List[Union[int, str]],
        parameters: List[
            Union[str, DWDObservationParameter, DWDObservationParameterSet]
        ],
        resolution: Union[str, DWDObservationResolution],
        periods: Optional[List[Union[str, DWDObservationPeriod]]] = None,
        start_date: Union[None, str, Timestamp, datetime] = None,
        end_date: Union[None, str, Timestamp, datetime] = None,
        storage: StorageAdapter = None,
        tidy_data: bool = True,
        humanize_column_names: bool = False,
    ) -> None:
        """
        Class with mostly flexible arguments to define a request regarding DWD data.
        Special handling for period type. If start_date/end_date are given all period
        types are considered and merged together and the data is filtered for the given
        dates afterwards.

        :param station_ids: definition of stations by str, int or list of str/int,
                            will be parsed to list of int
        :param parameters:           Observation measure
        :param resolution:     Frequency/granularity of measurement interval
        :param periods:         Recent or historical files (optional), if None
                                    and start_date and end_date None, all period
                                    types are used
        :param start_date:          Replacement for period type to define exact time
                                    of requested data, if used, period type will be set
                                    to all period types (hist, recent, now)
        :param end_date:            Replacement for period type to define exact time
                                    of requested data, if used, period type will be set
                                    to all period types (hist, recent, now)
        :param storage:             Storage adapter.
        :param tidy_data:           Reshape DataFrame to a more tidy
                                    and row-based version of data
        :param humanize_column_names: Replace column names by more meaningful ones
        """

        try:
            self.station_ids = pd.Series(station_ids).astype(int).tolist()
        except ValueError:
            raise ValueError("List of station id's can not be parsed to integers.")

        self.resolution = parse_enumeration_from_template(
            resolution, DWDObservationResolution
        )

        self.parameters = []

        for parameter in pd.Series(parameters):
            try:
                (
                    parameter,
                    parameter_set,
                ) = create_parameter_to_parameter_set_combination(
                    parameter, self.resolution
                )
                self.parameters.append((parameter, parameter_set))
            except InvalidParameter as e:
                log.info(str(e))

        if not self.parameters:
            raise NoParametersFound(f"No parameters could be parsed from {parameters}")

        # If any date is given, use all period types and filter, else if not period type
        # is given use all period types
        if start_date or end_date or not periods:
            self.periods = [*DWDObservationPeriod]
        # Otherwise period types will be parsed
        else:
            # For the case that a period_type is given, parse the period type(s)
            self.periods = (
                pd.Series(periods)
                .apply(parse_enumeration_from_template, args=(DWDObservationPeriod,))
                .sort_values()
                .tolist()
            )

        if start_date or end_date:
            # If only one date given, make the other one equal
            if not start_date:
                start_date = end_date

            if not end_date:
                end_date = start_date

            self.start_date = Timestamp(dateparser.parse(str(start_date)))
            self.end_date = Timestamp(dateparser.parse(str(end_date)))

            if not self.start_date <= self.end_date:
                raise StartDateEndDateError(
                    "Error: 'start_date' must be smaller or equal to 'end_date'."
                )
        else:
            self.start_date = start_date
            self.end_date = end_date

        self.storage = storage

        # If more then one parameter requested, automatically tidy data
        self.tidy_data = (
            len(self.parameters) > 1
            or any(
                [
                    not isinstance(parameter, DWDObservationParameterSet)
                    for parameter, parameter_set in self.parameters
                ]
            )
            or tidy_data
        )
        self.humanize_column_names = humanize_column_names
Ejemplo n.º 7
0
    def __init__(
        self,
        mosmix_type: DWDMosmixType,
        station_ids: List[str],
        parameters: Optional[List[Union[str, DWDForecastParameter]]] = None,
        start_date: Optional[Union[str, datetime,
                                   DWDForecastDate]] = DWDForecastDate.LATEST,
        end_date: Optional[Union[str, datetime, timedelta]] = None,
        tidy_data: bool = True,
        humanize_column_names: bool = False,
    ) -> None:
        """

        Args:
            mosmix_type: type of forecast, either small (MOSMIX-S) or large
                (MOSMIX-L), as string or enumeration
            station_ids: station ids which are being queried from the MOSMIX foreacst
            parameters: optional parameters for which the forecasts are filtered
            start_date: start date of the MOSMIX forecast, can be used in combination
                with end date to query multiple MOSMIX forecasts, or instead used with
                enumeration to only query LATEST MOSMIX forecast
            end_date: end date of MOSMIX forecast, can be used to query multiple MOSMIX
                forecasts available on the server
            tidy_data: boolean if pandas.DataFrame shall be tidied and values put in
                rows
            humanize_column_names: boolean if parameters shall be renamed to human
                readable names
        """

        if mosmix_type not in DWDMosmixType:
            raise ValueError(
                "period_type should be one of FORECAST_SHORT or FORECAST_LONG")
        if station_ids:
            station_ids = pd.Series(station_ids).astype(str).tolist()
        if parameters:
            parameters = (pd.Series(parameters).apply(
                parse_enumeration_from_template,
                args=(DWDForecastParameter, ),
            ).tolist())

        if not start_date and not end_date:
            start_date = DWDForecastDate.LATEST
        elif not end_date:
            end_date = start_date
        elif not start_date:
            start_date = end_date

        if start_date is not DWDForecastDate.LATEST:
            start_date = pd.to_datetime(start_date,
                                        infer_datetime_format=True).floor("1H")
            end_date = pd.to_datetime(end_date,
                                      infer_datetime_format=True).floor("1H")

            if not start_date <= end_date:
                raise StartDateEndDateError(
                    "end_date should be same or later then start_date")

            # Shift dates to 3, 9, 15, 21 hour format
            if mosmix_type == DWDMosmixType.LARGE:
                start_date = self.adjust_datetime(start_date)
                end_date = self.adjust_datetime(end_date)

        self.forecast_type = mosmix_type
        self.station_ids = station_ids
        self.parameters = parameters
        self.start_date = start_date
        self.end_date = end_date
        self.tidy_data = tidy_data
        self.humanize_column_names = humanize_column_names

        if mosmix_type == DWDMosmixType.SMALL:
            self.freq = "1H"  # short forecasts released every hour
        else:
            self.freq = "6H"

        self.kml = KMLReader(station_ids=self.station_ids,
                             parameters=self.parameters)
Ejemplo n.º 8
0
    def __init__(
        self,
        station_ids: Union[str, int, List[Union[int, str]]],
        parameter: Union[str, Parameter, List[Union[str, Parameter]]],
        time_resolution: Union[str, TimeResolution],
        period_type: Union[
            Union[None, str, PeriodType], List[Union[None, str, PeriodType]]
        ] = None,
        start_date: Union[None, str, Timestamp] = None,
        end_date: Union[None, str, Timestamp] = None,
        prefer_local: bool = False,
        write_file: bool = False,
        folder: Union[str, Path] = DWD_FOLDER_MAIN,
        tidy_data: bool = True,
        humanize_column_names: bool = False,
        create_new_file_index: bool = False,
    ) -> None:
        """
        Class with mostly flexible arguments to define a request regarding DWD data.
        Special handling for period type. If start_date/end_date are given all period
        types are considered and merged together and the data is filtered for the given
        dates afterwards.
        Args:
            station_ids: definition of stations by str, int or list of str/int,
            will be parsed to list of int
            parameter: str or parameter enumeration defining the requested parameter
            time_resolution: str or time resolution enumeration defining the requested
            time resolution
            period_type: str or period type enumeration defining the requested
            period type
            start_date: replacement for period type to define exact time of
            requested data
            end_date: replacement for period type to define exact time of requested data
            prefer_local: definition if data should rather be taken from a local source
            write_file: should data be written to a local file
            folder: place where file lists (and station data) are stored
            tidy_data: reshape DataFrame to a more tidy, row based version of data
            humanize_column_names: replace column names by more meaningful ones
            create_new_file_index: definition if the file index should be recreated
        """

        if not (period_type or start_date or end_date):
            raise ValueError(
                "Define either a 'time_resolution' or one of or both 'start_date' and "
                "'end_date' and leave 'time_resolution' empty!"
            )

        try:
            self.station_ids = [
                int(station_id) for station_id in cast_to_list(station_ids)
            ]
        except ValueError:
            raise ValueError("List of station id's can not be parsed to integers.")

        self.parameter = []
        for p in cast_to_list(parameter):
            self.parameter.append(parse_enumeration_from_template(p, Parameter))

        self.time_resolution = parse_enumeration_from_template(
            time_resolution, TimeResolution
        )

        # start date and end date required for collect_data in any case
        self.start_date = None
        self.end_date = None

        if period_type:
            # For the case that a period_type is given, parse the period type(s)
            self.period_type = []
            for pt in cast_to_list(period_type):
                if pt is None:
                    self.period_type.append(None)
                else:
                    self.period_type.append(
                        parse_enumeration_from_template(pt, PeriodType)
                    )

            # Additional sorting required for self.period_type to ensure that for
            # multiple periods the data is first sourced from historical
            self.period_type = sorted(self.period_type)

        else:
            # working with ranges of data means expecting data to be laying between
            # periods, thus including all periods
            self.period_type = [
                PeriodType.HISTORICAL,
                PeriodType.RECENT,
                PeriodType.NOW,
            ]

            # If only one date given, make the other one equal
            if not start_date:
                start_date = end_date

            if not end_date:
                end_date = start_date

            self.start_date = Timestamp(dateparser.parse(start_date))
            self.end_date = Timestamp(dateparser.parse(end_date))

            if not self.start_date <= self.end_date:
                raise StartDateEndDateError(
                    "Error: 'start_date' must be smaller or equal to 'end_date'."
                )

        self.prefer_local = prefer_local
        self.write_file = write_file
        self.folder = folder
        # If more then one parameter requested, automatically tidy data
        self.tidy_data = len(self.parameter) == 2 or tidy_data
        self.humanize_column_names = humanize_column_names
        self.create_new_file_index = create_new_file_index