コード例 #1
0
def test_build_parameter_identifier():
    parameter_identifier = build_parameter_set_identifier(
        DWDObservationParameterSet.CLIMATE_SUMMARY,
        DWDObservationResolution.DAILY,
        DWDObservationPeriod.HISTORICAL,
        1,
    )

    assert parameter_identifier == "kl/daily/historical/station_id_1"
コード例 #2
0
ファイル: store.py プロジェクト: inowas/wetterdienst
    def hdf5_key(self, station_id: int) -> str:
        """
        Builds a HDF5 key string from defined parameters including a single station id.

        :param station_id:  Station id of data

        :return:            Key for storing data into HDF5 file.
        """

        return build_parameter_set_identifier(self.parameter_set,
                                              self.resolution, self.period,
                                              station_id)
コード例 #3
0
ファイル: access.py プロジェクト: inowas/wetterdienst
def collect_climate_observations_data(
    station_id: int,
    parameter_set: DWDObservationParameterSet,
    resolution: DWDObservationResolution,
    period: DWDObservationPeriod,
) -> pd.DataFrame:
    """
    Function that organizes the complete pipeline of data collection, either
    from the internet or from a local file. It therefore goes through every given
    station id and, given by the parameters, either tries to get data from local
    store and/or if fails tries to get data from the internet. Finally if wanted
    it will try to store the data in a hdf file.

    :param station_id:              station id that is being loaded
    :param parameter_set:               Parameter as enumeration
    :param resolution:         Time resolution as enumeration
    :param period:             Period type as enumeration

    :return:                        All the data given by the station ids.
    """
    if not check_dwd_observations_parameter_set(parameter_set, resolution,
                                                period):
        raise InvalidParameterCombination(
            f"Invalid combination: {parameter_set.value} / {resolution.value} / "
            f"{period.value}")

    remote_files = create_file_list_for_climate_observations(
        station_id, parameter_set, resolution, period)

    if len(remote_files) == 0:
        parameter_identifier = build_parameter_set_identifier(
            parameter_set, resolution, period, station_id)
        log.info(
            f"No files found for {parameter_identifier}. Station will be skipped."
        )
        return pd.DataFrame()

    filenames_and_files = download_climate_observations_data_parallel(
        remote_files)

    obs_df = parse_climate_observations_data(filenames_and_files,
                                             parameter_set, resolution)

    obs_df = coerce_field_types(obs_df, resolution)

    return obs_df
コード例 #4
0
ファイル: api.py プロジェクト: inowas/wetterdienst
    def _collect_data(
        self, station_id: int, parameter_set: DWDObservationParameterSet
    ) -> pd.DataFrame:
        """
        Method to collect data for one specified parameter. Manages restoring,
        collection and storing of data, transformation and combination of different
        periods.

        Args:
            station_id: station id for which parameter is collected
            parameter_set: chosen parameter that is collected

        Returns:
            pandas.DataFrame for given parameter of station
        """
        df_parameter = pd.DataFrame()

        for period_type in self.periods:
            parameter_identifier = build_parameter_set_identifier(
                parameter_set, self.resolution, period_type, station_id
            )

            storage = None
            if self.storage:
                storage = self.storage.hdf5(
                    parameter=parameter_set,
                    resolution=self.resolution,
                    period=period_type,
                )

                df_period = storage.restore(station_id)

                if not df_period.empty:
                    df_parameter = df_parameter.append(df_period)
                    continue

            log.info(f"Acquiring observations data for {parameter_identifier}.")

            try:
                df_period = collect_climate_observations_data(
                    station_id, parameter_set, self.resolution, period_type
                )
            except InvalidParameterCombination:
                log.info(
                    f"Invalid combination {parameter_set.value}/"
                    f"{self.resolution.value}/{period_type} is skipped."
                )

                df_period = pd.DataFrame()

            if self.storage and self.storage.persist:
                storage.store(station_id=station_id, df=df_period)

            # Filter out values which already are in the DataFrame
            try:
                df_period = df_period[
                    ~df_period[DWDMetaColumns.DATE.value].isin(
                        df_parameter[DWDMetaColumns.DATE.value]
                    )
                ]
            except KeyError:
                pass

            df_parameter = df_parameter.append(df_period)

        if self.tidy_data:
            df_parameter = df_parameter.dwd.tidy_up_data()

            df_parameter.insert(2, DWDMetaColumns.PARAMETER.value, parameter_set.name)

        # Assign meaningful column names (humanized).
        if self.humanize_column_names:
            hcnm = self._create_humanized_column_names_mapping(
                self.resolution, parameter_set
            )

            if self.tidy_data:
                df_parameter[DWDMetaColumns.ELEMENT.value] = df_parameter[
                    DWDMetaColumns.ELEMENT.value
                ].apply(lambda x: hcnm[x])
            else:
                df_parameter = df_parameter.rename(columns=hcnm)

        return df_parameter
コード例 #5
0
    def _collect_station_parameter(
        self,
        station_id: str,
        parameter: Tuple[Union[DWDObservationParameter,
                               DWDObservationParameterSet],
                         DWDObservationParameterSet, ],
    ) -> pd.DataFrame:
        """
        Method to collect data for one specified parameter. Manages restoring,
        collection and storing of data, transformation and combination of different
        periods.

        Args:
            station_id: station id for which parameter is collected
            parameter: chosen parameter-parameter_set combination that is collected

        Returns:
            pandas.DataFrame for given parameter of station
        """
        parameter, parameter_set = parameter

        periods_and_date_ranges = []
        for period in self.periods:
            if (self.resolution in HIGH_RESOLUTIONS
                    and period == DWDObservationPeriod.HISTORICAL):
                date_ranges = self._get_historical_date_ranges(
                    station_id, parameter_set)

                for date_range in date_ranges:
                    periods_and_date_ranges.append((period, date_range))
            else:
                periods_and_date_ranges.append((period, None))

        parameter_df = pd.DataFrame()

        for period, date_range in periods_and_date_ranges:
            parameter_identifier = build_parameter_set_identifier(
                parameter_set, self.resolution, period, station_id, date_range)

            log.info(
                f"Acquiring observations data for {parameter_identifier}.")

            # TODO: integrate collect_climate_observations_data in class
            try:
                period_df = collect_climate_observations_data(
                    station_id, parameter_set, self.resolution, period,
                    date_range)
            except InvalidParameterCombination:
                log.info(f"Invalid combination {parameter_set.value}/"
                         f"{self.resolution.value}/{period} is skipped.")

                period_df = pd.DataFrame()

            # Filter out values which already are in the DataFrame
            try:
                period_df = period_df[~period_df[DWDMetaColumns.DATE.value].
                                      isin(parameter_df[DWDMetaColumns.DATE.
                                                        value])]
            except KeyError:
                pass

            parameter_df = parameter_df.append(period_df)

        if self.tidy_data:
            parameter_df = parameter_df.dwd.tidy_up_data()

            # TODO: remove this column and rather move it into metadata of resulting
            #  data model
            parameter_df.insert(2, DWDMetaColumns.PARAMETER_SET.value,
                                parameter_set.name)
            parameter_df[DWDMetaColumns.PARAMETER_SET.value] = parameter_df[
                DWDMetaColumns.PARAMETER_SET.value].astype("category")

        if parameter not in DWDObservationParameterSet:
            parameter_df = parameter_df[parameter_df[
                DWDMetaColumns.PARAMETER.value] == parameter.value]

        return parameter_df