def __init__( self, parameter_set: Union[str, DWDObservationParameterSet], resolution: Union[str, DWDObservationResolution], period: Union[str, DWDObservationPeriod] = None, start_date: Union[None, str, Timestamp] = None, end_date: Union[None, str, Timestamp] = None, ): """ :param parameter_set: parameter set str/enumeration :param resolution: resolution str/enumeration :param period: period str/enumeration :param start_date: start date to limit the stations :param end_date: end date to limit the stations """ super().__init__(start_date=start_date, end_date=end_date) parameter_set = parse_enumeration_from_template( parameter_set, DWDObservationParameterSet) resolution = parse_enumeration_from_template(resolution, DWDObservationResolution) period = parse_enumeration_from_template(period, DWDObservationPeriod) # TODO: move to _all and replace error with logging + empty dataframe if not check_dwd_observations_parameter_set(parameter_set, resolution, period): raise InvalidParameterCombination( f"The combination of {parameter_set.value}, {resolution.value}, " f"{period.value} is invalid.") self.parameter = parameter_set self.resolution = resolution self.period = period
def create_parameter_to_dataset_combination( parameter: Union[DwdObservationParameter, DwdObservationDataset], resolution: Resolution, ) -> Tuple[Union[DwdObservationParameter, DwdObservationDataset], DwdObservationDataset, ]: """Function to create a mapping from a requested parameter to a provided parameter set which has to be downloaded first to extract the parameter from it""" try: parameter_ = parse_enumeration_from_template( parameter, DwdObservationParameter[resolution.name]) parameter = PARAMETER_TO_DATASET_MAPPING[resolution][parameter_] return parameter, parse_enumeration_from_template( parameter.__class__.__name__, DwdObservationDataset) except (KeyError, InvalidEnumeration): try: parameter_set = parse_enumeration_from_template( parameter, DwdObservationDataset) return parameter_set, parameter_set except InvalidEnumeration: raise InvalidParameter( f"parameter {parameter} could not be parsed for " f"time resolution {resolution}")
def dwd_stations( parameter: str = Query(default=None), resolution: str = Query(default=None), period: str = Query(default=None), sql: str = Query(default=None), ): parameter = parse_enumeration_from_template(parameter, DWDObservationParameterSet) resolution = parse_enumeration_from_template(resolution, DWDObservationResolution) period = parse_enumeration_from_template(period, DWDObservationPeriod) # Data acquisition. df = DWDObservationSites( parameter_set=parameter, resolution=resolution, period=period, ).all() # Postprocessing. df = df.dwd.lower() if sql is not None: df = df.io.sql(sql) return make_json_response(df.io.to_dict())
def __init__( self, parameter_set: Union[str, DWDObservationParameterSet], resolution: Union[str, DWDObservationResolution], period: Union[str, DWDObservationPeriod] = None, start_date: Union[None, str, Timestamp] = None, end_date: Union[None, str, Timestamp] = None, ): super().__init__(start_date=start_date, end_date=end_date) parameter_set = parse_enumeration_from_template( parameter_set, DWDObservationParameterSet ) resolution = parse_enumeration_from_template( resolution, DWDObservationResolution ) period = parse_enumeration_from_template(period, DWDObservationPeriod) if not check_dwd_observations_parameter_set(parameter_set, resolution, period): raise InvalidParameterCombination( f"The combination of {parameter_set.value}, {resolution.value}, " f"{period.value} is invalid." ) self.parameter = parameter_set self.resolution = resolution self.period = period
def dwd_sites( product: str, parameter: str = Query(default=None), resolution: str = Query(default=None), period: str = Query(default=None), lon: float = Query(default=None), lat: float = Query(default=None), number_nearby: int = Query(default=None), max_distance_in_km: int = Query(default=None), sql: str = Query(default=None), ): if product not in ["observations", "mosmix"]: return HTTPException(status_code=404, detail=f"product {product} not found") # Data acquisition. if product == "observations": if parameter is None or resolution is None or period is None: raise HTTPException( status_code=400, detail="Query arguments 'parameter', 'resolution' " "and 'period' are required", ) parameter = parse_enumeration_from_template( parameter, DWDObservationParameterSet ) resolution = parse_enumeration_from_template( resolution, DWDObservationResolution ) period = parse_enumeration_from_template(period, DWDObservationPeriod) sites = DWDObservationStations( parameter_set=parameter, resolution=resolution, period=period, ) else: sites = DWDMosmixStations() if lon and lat and (number_nearby or max_distance_in_km): if number_nearby: df = sites.nearby_number( latitude=lat, longitude=lon, num_stations_nearby=number_nearby ) else: df = sites.nearby_radius( latitude=lat, longitude=lon, max_distance_in_km=max_distance_in_km ) else: df = sites.all() # Postprocessing. df = df.dwd.lower() if sql is not None: df = df.io.sql(sql) return make_json_response(df.fillna(-999).io.to_dict())
def test_parse_enumeration_from_template(): assert (parse_enumeration_from_template("climate_summary", DWDObservationParameterSet) == DWDObservationParameterSet.CLIMATE_SUMMARY) assert (parse_enumeration_from_template("kl", DWDObservationParameterSet) == DWDObservationParameterSet.CLIMATE_SUMMARY) with pytest.raises(InvalidEnumeration): parse_enumeration_from_template("climate", DWDObservationParameterSet)
def _build_complete_df(self, df: pd.DataFrame, station_id: str, parameter: Enum, dataset: Enum) -> pd.DataFrame: """Method to build a complete df with all dates from start to end date included. :param df: :param station_id: :param parameter: :param dataset: :return: """ if parameter != dataset or not self.sr.tidy: base_df = self._get_base_df(station_id) df = pd.merge( left=base_df, right=df, left_on=Columns.DATE.value, right_on=Columns.DATE.value, how="left", ) if self.sr.tidy: df.loc[:, Columns.PARAMETER.value] = parameter.value df.loc[:, Columns.PARAMETER.value] = pd.Categorical(df.loc[:, Columns.PARAMETER.value]) return df else: data = [] for parameter, group in df.groupby(Columns.PARAMETER.value, sort=False): if self.sr._unique_dataset: parameter_ = parse_enumeration_from_template( parameter, self.sr._parameter_base[self.sr.resolution.name], ) else: parameter_ = parse_enumeration_from_template( parameter, self.sr._parameter_base[self.sr._dataset_accessor][dataset.name], ) df = pd.merge( left=self._get_base_df(station_id), right=group, left_on=Columns.DATE.value, right_on=Columns.DATE.value, how="left", ) df[Columns.PARAMETER.value] = parameter_.value.lower() data.append(df) return pd.concat(data)
def test_parse_enumeration_from_template(): assert ( parse_enumeration_from_template("climate_summary", DwdObservationDataset) == DwdObservationDataset.CLIMATE_SUMMARY ) assert ( parse_enumeration_from_template("CLIMATE_SUMMARY", DwdObservationDataset) == DwdObservationDataset.CLIMATE_SUMMARY ) assert parse_enumeration_from_template("kl", DwdObservationDataset) == DwdObservationDataset.CLIMATE_SUMMARY with pytest.raises(InvalidEnumeration): parse_enumeration_from_template("climate", DwdObservationDataset)
def _parse_parameter_and_dataset(self, parameter) -> Tuple[Enum, Enum]: """Try to parse dataset first e.g. when "climate_summary" or "precipitation_height", "climate_summary" is requested :param parameter: :return: """ parameter_, dataset_ = None, None flat_parameters = { par for par in self._parameter_base[self._dataset_accessor] if hasattr(par, "name") } for par in flat_parameters: if par.name.lower() == parameter.lower() or par.value.lower( ) == parameter.lower(): parameter_ = par break if parameter_: dataset_name = parameter_.__class__.__name__ dataset_ = parse_enumeration_from_template(dataset_name, self._dataset_base) return parameter_, dataset_
def __init__( self, parameter: Optional[Tuple[Union[str, DwdMosmixParameter], ...]], mosmix_type: Union[str, DwdMosmixType], start_issue: Optional[Union[str, datetime, DwdForecastDate]] = DwdForecastDate.LATEST, end_issue: Optional[Union[str, datetime]] = None, start_date: Optional[Union[str, datetime]] = None, end_date: Optional[Union[str, datetime]] = None, humanize: bool = True, tidy: bool = True, si_units: bool = True, ) -> None: self.mosmix_type = parse_enumeration_from_template( mosmix_type, DwdMosmixType) super().__init__( parameter=parameter, start_date=start_date, end_date=end_date, resolution=Resolution.HOURLY, period=Period.FUTURE, si_units=si_units, ) # Parse issue date if not set to fixed "latest" string if start_issue is DwdForecastDate.LATEST and end_issue: log.info( "end_issue will be ignored as 'latest' was selected for issue date" ) if start_issue is not DwdForecastDate.LATEST: if not start_issue and not end_issue: start_issue = DwdForecastDate.LATEST elif not end_issue: end_issue = start_issue elif not start_issue: start_issue = end_issue start_issue = pd.to_datetime( start_issue, infer_datetime_format=True).floor("1H") end_issue = pd.to_datetime(end_issue, infer_datetime_format=True).floor("1H") # Shift start date and end date to 3, 9, 15, 21 hour format if mosmix_type == DwdMosmixType.LARGE: start_issue = self.adjust_datetime(start_issue) end_issue = self.adjust_datetime(end_issue) # TODO: this should be replaced by the freq property in the main class if self.mosmix_type == DwdMosmixType.SMALL: self.resolution = Resolution.HOURLY else: self.resolution = Resolution.HOUR_6 self.start_issue = start_issue self.end_issue = end_issue self.humanize = humanize self.tidy = tidy
def describe_fields(cls, dataset, resolution, period, language: str = "en") -> dict: """ :param dataset: :param resolution: :param period: :param language: :return: """ from wetterdienst.provider.dwd.observation.fields import read_description dataset = parse_enumeration_from_template(dataset, DwdObservationDataset) resolution = parse_enumeration_from_template(resolution, cls._resolution_base, Resolution) period = parse_enumeration_from_template(period, cls._period_base, Period) file_index = _create_file_index_for_dwd_server( dataset=dataset, resolution=resolution, period=period, cdc_base=DWDCDCBase.CLIMATE_OBSERVATIONS, ) if language == "en": file_prefix = "DESCRIPTION_" elif language == "de": file_prefix = "BESCHREIBUNG_" else: raise ValueError("Only language 'en' or 'de' supported") file_index = file_index[file_index[ DwdColumns.FILENAME.value].str.contains(file_prefix)] description_file_url = str( file_index[DwdColumns.FILENAME.value].tolist()[0]) log.info(f"Acquiring field information from {description_file_url}") return read_description(description_file_url, language=language)
def __new__(cls, provider: Union[Provider, str], kind: Union[Kind, str]): """ :param provider: provider of data e.g. DWD :param kind: kind of the data e.g. observation """ # Both provider and kind should be fine (if not an exception is raised) provider = parse_enumeration_from_template(provider, Provider) kind = parse_enumeration_from_template(kind, Kind) api = cls.endpoints.get(provider, {}).get(kind) if not api: raise ValueError( f"No API available for provider {provider.value} and kind {kind.value}" ) return api
def resolution(self, res) -> None: # TODO: add functionality to parse arbitrary resolutions for cases where # resolution has to be determined based on returned data if self._resolution_type in (ResolutionType.FIXED, ResolutionType.UNDEFINED): self._resolution = res else: self._resolution = parse_enumeration_from_template( res, self._resolution_base, Resolution )
def _parse_dataset_and_parameter( self, parameter, dataset) -> Tuple[Optional[Enum], Optional[Enum]]: """ Parse parameters for cases like - parameter=("climate_summary", ) or - parameter=(("precipitation_height", "climate_summary")) :param self: :param parameter: :param dataset: :return: """ parameter_, dataset_ = None, None try: dataset_ = parse_enumeration_from_template(dataset, self._dataset_base) except InvalidEnumeration: pass if dataset_ and self._has_datasets and not self._unique_dataset: try: self._parameter_base[self._dataset_accessor][dataset_.name] except (KeyError, AttributeError): log.warning( f"dataset {dataset_.name} is not a valid dataset for resolution {self._dataset_accessor}" ) return None, None if dataset_: if parameter == dataset: # Case 1: entire dataset e.g. parameter="climate_summary" parameter_, dataset_ = dataset_, dataset_ else: # Case 2: dataset and parameter e.g. (precipitation_height, climate_summary) try: parameter_ = parse_enumeration_from_template( parameter, self._parameter_base[self._dataset_accessor] [dataset_.name]) except (InvalidEnumeration, TypeError): pass return parameter_, dataset_
def create_parameter_to_parameter_set_combination( parameter: Union[DWDObservationParameter, DWDObservationParameterSet], resolution: DWDObservationResolution, ) -> Tuple[Union[DWDObservationParameter, DWDObservationParameterSet], DWDObservationParameterSet, ]: """Function to create a mapping from a requested parameter to a provided parameter set which has to be downloaded first to extract the parameter from it""" parameter_set_enums = [ value for key, value in DWDObservationParameterSetStructure[ resolution.name].__dict__.items() if not key.startswith("_") ] for parameter_set_enum in parameter_set_enums: try: parameter_ = parse_enumeration_from_template( parameter, DWDObservationParameterSetStructure[resolution.name][ parameter_set_enum.__name__], ) parameter_set = parse_enumeration_from_template( parameter_set_enum.__name__, DWDObservationParameterSet) return parameter_, parameter_set except InvalidEnumeration: pass try: parameter_set = parse_enumeration_from_template( parameter, DWDObservationParameterSet) return parameter_set, parameter_set except InvalidEnumeration: pass raise InvalidParameter(f"parameter {parameter} could not be parsed for " f"time resolution {resolution}")
def __new__(cls, provider: str, network: str): """ :param provider: provider of data e.g. DWD :param network: data network e.g. NOAAs ghcn """ # Both provider and network should be fine (if not an exception is raised) try: provider_ = parse_enumeration_from_template(provider, Provider) api = cls.endpoints[provider_.name][network.upper()].value if not api: raise KeyError except (InvalidEnumeration, KeyError): raise ProviderError( f"No API available for provider {provider} and network {network}" ) return api
def test_compare_available_dwd_datasets(): """Test to compare the datasets made available with wetterdienst with the ones actually availabel on the DWD CDC server instance""" # similar to func list_remote_files_fsspec, but we don't want to get full depth fs = HTTPFileSystem( use_listings_cache=True, listings_expiry_time=CacheExpiry.TWELVE_HOURS.value, listings_cache_type="filedircache", listings_cache_location=cache_dir, client_kwargs=FSSPEC_CLIENT_KWARGS, ) base_url = "https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/" files = fs.expand_path(base_url, recursive=True, maxdepth=3) df = pd.DataFrame({"files": files}) df.files = df.files.str[len(base_url):-1] # filter resolution folders df = df.loc[df.files.str.count("/") == 1, :] df.loc[:, ["resolution", "dataset"]] = df.pop("files").str.split( "/").tolist() for _, (resolution, dataset) in df.iterrows(): rd_pair = (resolution, dataset) if rd_pair in SKIP_DATASETS: continue resolution = parse_enumeration_from_template(resolution, DwdObservationResolution, Resolution) dataset = DwdObservationDataset(dataset) assert dataset in RESOLUTION_DATASET_MAPPING[resolution].keys() assert DwdObservationParameter[resolution.name][dataset.name]
def __init__( self, station_ids: List[Union[int, str]], parameters: List[ Union[str, DWDObservationParameter, DWDObservationParameterSet] ], resolution: Union[str, DWDObservationResolution], periods: Optional[List[Union[str, DWDObservationPeriod]]] = None, start_date: Union[None, str, Timestamp, datetime] = None, end_date: Union[None, str, Timestamp, datetime] = None, storage: StorageAdapter = None, tidy_data: bool = True, humanize_column_names: bool = False, ) -> None: """ Class with mostly flexible arguments to define a request regarding DWD data. Special handling for period type. If start_date/end_date are given all period types are considered and merged together and the data is filtered for the given dates afterwards. :param station_ids: definition of stations by str, int or list of str/int, will be parsed to list of int :param parameters: Observation measure :param resolution: Frequency/granularity of measurement interval :param periods: Recent or historical files (optional), if None and start_date and end_date None, all period types are used :param start_date: Replacement for period type to define exact time of requested data, if used, period type will be set to all period types (hist, recent, now) :param end_date: Replacement for period type to define exact time of requested data, if used, period type will be set to all period types (hist, recent, now) :param storage: Storage adapter. :param tidy_data: Reshape DataFrame to a more tidy and row-based version of data :param humanize_column_names: Replace column names by more meaningful ones """ try: self.station_ids = pd.Series(station_ids).astype(int).tolist() except ValueError: raise ValueError("List of station id's can not be parsed to integers.") self.resolution = parse_enumeration_from_template( resolution, DWDObservationResolution ) self.parameters = [] for parameter in pd.Series(parameters): try: ( parameter, parameter_set, ) = create_parameter_to_parameter_set_combination( parameter, self.resolution ) self.parameters.append((parameter, parameter_set)) except InvalidParameter as e: log.info(str(e)) if not self.parameters: raise NoParametersFound(f"No parameters could be parsed from {parameters}") # If any date is given, use all period types and filter, else if not period type # is given use all period types if start_date or end_date or not periods: self.periods = [*DWDObservationPeriod] # Otherwise period types will be parsed else: # For the case that a period_type is given, parse the period type(s) self.periods = ( pd.Series(periods) .apply(parse_enumeration_from_template, args=(DWDObservationPeriod,)) .sort_values() .tolist() ) if start_date or end_date: # If only one date given, make the other one equal if not start_date: start_date = end_date if not end_date: end_date = start_date self.start_date = Timestamp(dateparser.parse(str(start_date))) self.end_date = Timestamp(dateparser.parse(str(end_date))) if not self.start_date <= self.end_date: raise StartDateEndDateError( "Error: 'start_date' must be smaller or equal to 'end_date'." ) else: self.start_date = start_date self.end_date = end_date self.storage = storage # If more then one parameter requested, automatically tidy data self.tidy_data = ( len(self.parameters) > 1 or any( [ not isinstance(parameter, DWDObservationParameterSet) for parameter, parameter_set in self.parameters ] ) or tidy_data ) self.humanize_column_names = humanize_column_names
def dwd_readings( product: str, station: str = Query(default=None), parameter: str = Query(default=None), resolution: str = Query(default=None), period: str = Query(default=None), mosmix_type: str = Query(default=None, alias="mosmix-type"), date: str = Query(default=None), sql: str = Query(default=None), ): """ Acquire data from DWD. # TODO: Obtain lat/lon distance/number information. :param product: string for product, either observations or mosmix :param station: Comma-separated list of station identifiers. :param parameter: Observation measure :param resolution: Frequency/granularity of measurement interval :param period: Recent or historical files :param mosmix_type type of mosmix, either small or large :param date: Date or date range :param sql: SQL expression :return: """ if product not in ["observations", "mosmix"]: return HTTPException(status_code=404, detail=f"product {product} not found") if station is None: raise HTTPException( status_code=400, detail="Query argument 'station' is required" ) station_ids = map(str, read_list(station)) if product == "observations": if parameter is None or resolution is None or period is None: raise HTTPException( status_code=400, detail="Query arguments 'parameter', 'resolution' " "and 'period' are required", ) parameter = parse_enumeration_from_template( parameter, DWDObservationParameterSet ) resolution = parse_enumeration_from_template( resolution, DWDObservationResolution ) period = parse_enumeration_from_template(period, DWDObservationPeriod) # Data acquisition. readings = DWDObservationData( station_ids=station_ids, parameters=parameter, resolution=resolution, periods=period, tidy_data=True, humanize_parameters=True, ) else: if mosmix_type is None: raise HTTPException( status_code=400, detail="Query argument 'mosmix_type' is required" ) mosmix_type = parse_enumeration_from_template(mosmix_type, DWDMosmixType) readings = DWDMosmixData(station_ids=station_ids, mosmix_type=mosmix_type) # Postprocessing. df = readings.all() if date is not None: df = df.dwd.filter_by_date(date, resolution) df = df.dwd.lower() if sql is not None: df = df.io.sql(sql) data = json.loads(df.to_json(orient="records", date_format="iso")) return make_json_response(data)
def __init__( self, parameter: Union[str, DWDRadarParameter], site: Optional[DWDRadarSite] = None, fmt: Optional[DWDRadarDataFormat] = None, subset: Optional[DWDRadarDataSubset] = None, elevation: Optional[int] = None, start_date: Optional[Union[str, datetime, DWDRadarDate]] = None, end_date: Optional[Union[str, datetime, timedelta]] = None, resolution: Optional[Union[str, DWDRadarResolution]] = None, period: Optional[Union[str, DWDRadarPeriod]] = None, ) -> None: """ :param parameter: The radar moment to request :param site: Site/station if parameter is one of RADAR_PARAMETERS_SITES :param fmt: Data format (BINARY, BUFR, HDF5) :param subset: The subset (simple or polarimetric) for HDF5 data. :param start_date: Start date :param end_date: End date :param resolution: Time resolution for RadarParameter.RADOLAN_CDC, either daily or hourly or 5 minutes. :param period: Period type for RadarParameter.RADOLAN_CDC :return: Nothing for now. """ # Convert parameters to enum types. self.parameter = parse_enumeration_from_template(parameter, DWDRadarParameter) self.site = parse_enumeration_from_template(site, DWDRadarSite) self.format = parse_enumeration_from_template(fmt, DWDRadarDataFormat) self.subset = parse_enumeration_from_template(subset, DWDRadarDataSubset) self.elevation = elevation and int(elevation) self.resolution = parse_enumeration_from_template( resolution, DWDRadarResolution ) self.period = parse_enumeration_from_template(period, DWDRadarPeriod) # Sanity checks. if self.parameter == DWDRadarParameter.RADOLAN_CDC: if resolution not in ( DWDRadarResolution.DAILY, DWDRadarResolution.HOURLY, DWDRadarResolution.MINUTE_5, ): raise ValueError( "RADOLAN_CDC only supports daily, hourly and 5 minutes resolutions" ) elevation_parameters = [ DWDRadarParameter.SWEEP_VOL_VELOCITY_H, DWDRadarParameter.SWEEP_VOL_REFLECTIVITY_H, ] if self.elevation is not None and self.parameter not in elevation_parameters: raise ValueError( f"Argument 'elevation' only valid for parameter={elevation_parameters}" ) if start_date == DWDRadarDate.LATEST: # HDF5 folders do not have "-latest-" files. if self.parameter == DWDRadarParameter.RADOLAN_CDC: raise ValueError("RADOLAN_CDC data has no '-latest-' files") # HDF5 folders do not have "-latest-" files. if self.format == DWDRadarDataFormat.HDF5: raise ValueError("HDF5 data has no '-latest-' files") if start_date == DWDRadarDate.CURRENT and not self.period: self.period = DWDRadarPeriod.RECENT # Evaluate "RadarDate.MOST_RECENT" for "start_date". # # HDF5 folders do not have "-latest-" files, so we will have to synthesize them # appropriately by going back to the second last volume of 5 minute intervals. # # The reason for this is that when requesting sweep data in HDF5 format at # e.g. 15:03, not all files will be available on the DWD data repository for # the whole volume (e.g. covering all elevation levels) within the time range # of 15:00-15:04:59 as they apparently will be added incrementally while the # scan is performed. # # So, we will be better off making the machinery retrieve the latest "full" # volume by addressing the **previous** volume. So, when requesting data at # 15:03, it will retrieve 14:55:00-14:59:59. # if fmt == DWDRadarDataFormat.HDF5 and start_date == DWDRadarDate.MOST_RECENT: start_date = datetime.utcnow() - timedelta(minutes=5) end_date = None if ( start_date == DWDRadarDate.MOST_RECENT and parameter == DWDRadarParameter.RADOLAN_CDC ): start_date = datetime.utcnow() - timedelta(minutes=50) end_date = None # Evaluate "RadarDate.CURRENT" for "start_date". if start_date == DWDRadarDate.CURRENT: start_date = datetime.utcnow() end_date = None # Evaluate "RadarDate.LATEST" for "start_date". if start_date == DWDRadarDate.LATEST: self.start_date = start_date self.end_date = None # Evaluate any datetime for "start_date". else: self.start_date = pd.to_datetime(start_date, infer_datetime_format=True) self.end_date = end_date self.adjust_datetimes() log.info( f"DWDRadarRequest with {self.parameter}, {self.site}, " f"{self.format}, {self.resolution} " f"for {self.start_date}/{self.end_date}" )
def dwd_readings( station: str = Query(default=None), parameter: str = Query(default=None), resolution: str = Query(default=None), period: str = Query(default=None), date: str = Query(default=None), sql: str = Query(default=None), ): """ Acquire data from DWD. # TODO: Obtain lat/lon distance/number information. :param station: Comma-separated list of station identifiers. :param parameter: Observation measure :param resolution: Frequency/granularity of measurement interval :param period: Recent or historical files :param date: Date or date range :param sql: SQL expression :return: """ if station is None: raise HTTPException( status_code=400, detail="Query argument 'station' is required" ) if parameter is None or resolution is None or period is None: raise HTTPException( status_code=400, detail="Query arguments 'parameter', 'resolution' " "and 'period' are required", ) station_ids = map(int, read_list(station)) parameter = parse_enumeration_from_template(parameter, DWDObservationParameterSet) resolution = parse_enumeration_from_template(resolution, DWDObservationResolution) period = parse_enumeration_from_template(period, DWDObservationPeriod) # Data acquisition. observations = DWDObservationData( station_ids=station_ids, parameters=parameter, resolution=resolution, periods=period, tidy_data=True, humanize_column_names=True, ) # Postprocessing. df = observations.collect_safe() if date is not None: df = df.dwd.filter_by_date(date, resolution) df = df.dwd.lower() if sql is not None: df = df.io.sql(sql) data = json.loads(df.to_json(orient="records", date_format="iso")) return make_json_response(data)
def __init__( self, parameter: Tuple[Union[str, Enum]], resolution: Resolution, period: Period, start_date: Optional[Union[str, datetime, pd.Timestamp]] = None, end_date: Optional[Union[str, datetime, pd.Timestamp]] = None, ) -> None: """ :param parameter: requested parameter(s) :param resolution: requested resolution :param period: requested period(s) :param start_date: Start date for filtering stations_result for their available data :param end_date: End date for filtering stations_result for their available data """ settings = copy(Settings) super().__init__() self.resolution = parse_enumeration_from_template( resolution, self._resolution_base, Resolution) self.period = self._parse_period(period) self.start_date, self.end_date = self.convert_timestamps( start_date, end_date) self.parameter = self._parse_parameter(parameter) if not self.parameter: raise NoParametersFound( "no valid parameters could be parsed from given argument") self.humanize = settings.humanize tidy = settings.tidy if self._has_datasets: tidy = tidy or any([ parameter not in self._dataset_base for parameter, dataset in self.parameter ]) self.tidy = tidy self.si_units = settings.si_units # skip empty stations self.skip_empty = self.tidy and settings.skip_empty self.skip_threshold = settings.skip_threshold self.dropna = self.tidy and settings.dropna if not tidy and settings.skip_empty: log.warning( "option 'skip_empty' is only available with option 'tidy' and is thus ignored in this request." ) if not tidy and settings.dropna: log.warning( "option 'dropna' is only available with option 'tidy' and is thus ignored in this request." ) # optional attribute for dynamic resolutions if self.resolution == Resolution.DYNAMIC: self._dynamic_frequency = None log.info(f"Processing request {self.__repr__()}")
def dynamic_frequency(self, df) -> None: if df: self._dynamic_frequency = parse_enumeration_from_template( df, Frequency)
def __init__( self, parameter: Optional[Tuple[Union[str, DwdMosmixParameter], ...]], mosmix_type: Union[str, DwdMosmixType], start_issue: Optional[Union[str, datetime, DwdForecastDate]] = DwdForecastDate.LATEST, end_issue: Optional[Union[str, datetime]] = None, start_date: Optional[Union[str, datetime]] = None, end_date: Optional[Union[str, datetime]] = None, ) -> None: """ :param parameter: parameter(s) to be collected :param mosmix_type: mosmix type, either small or large :param start_issue: start of issue of mosmix which should be caught (Mosmix run at time XX:YY) :param end_issue: end of issue :param start_date: start date for filtering returned dataframe :param end_date: end date """ self.mosmix_type = parse_enumeration_from_template( mosmix_type, DwdMosmixType) super().__init__( parameter=parameter, start_date=start_date, end_date=end_date, resolution=Resolution.HOURLY, period=Period.FUTURE, ) if not start_issue: start_issue = DwdForecastDate.LATEST try: start_issue = parse_enumeration_from_template( start_issue, DwdForecastDate) except InvalidParameter: pass # Parse issue date if not set to fixed "latest" string if start_issue is DwdForecastDate.LATEST and end_issue: log.info( "end_issue will be ignored as 'latest' was selected for issue date" ) if start_issue is not DwdForecastDate.LATEST: if not start_issue and not end_issue: start_issue = DwdForecastDate.LATEST elif not end_issue: end_issue = start_issue elif not start_issue: start_issue = end_issue start_issue = pd.to_datetime( start_issue, infer_datetime_format=True).floor("1H") end_issue = pd.to_datetime(end_issue, infer_datetime_format=True).floor("1H") # Shift start date and end date to 3, 9, 15, 21 hour format if mosmix_type == DwdMosmixType.LARGE: start_issue = self.adjust_datetime(start_issue) end_issue = self.adjust_datetime(end_issue) # TODO: this should be replaced by the freq property in the main class if self.mosmix_type == DwdMosmixType.SMALL: self.resolution = Resolution.HOURLY else: self.resolution = Resolution.HOUR_6 self.start_issue = start_issue self.end_issue = end_issue
def get_stations( api, parameter: List[str], resolution: str, period: List[str], date: Optional[str], issue: str, all_, station_id: List[str], name: str, coordinates: str, rank: int, distance: float, bbox: str, sql: str, si_units: bool, tidy: bool, humanize: bool, skip_empty: bool, skip_threshold: float, dropna: bool, ) -> StationsResult: """ Core function for querying stations_result via cli and restapi :param api: :param parameter: :param resolution: :param period: :param date: :param issue: :param all_: :param station_id: :param name:# :param coordinates: :param rank: :param distance: :param bbox: :param sql: :param date: :param si_units: :param tidy: :param humanize: :param skip_empty: :param skip_threshold: :param dropna: :return: """ # TODO: move this into Request core start_date, end_date = None, None if date: # TODO: use rather network here if api == DwdMosmixRequest: mosmix_type = DwdMosmixType[resolution.upper()] if mosmix_type == DwdMosmixType.SMALL: res = Resolution.HOURLY else: res = Resolution.HOUR_6 else: res = parse_enumeration_from_template(resolution, api._resolution_base, Resolution) # Split date string into start and end date string start_date, end_date = create_date_range(date=date, resolution=res) if api._data_range == DataRange.LOOSELY and not start_date and not end_date: # TODO: use another property "network" on each class raise TypeError( f"Combination of provider {api.provider.name} and network {api.kind.name} requires start and end date" ) # Todo: We may have to apply other measures to allow for # different request initializations # DWD Mosmix has fixed resolution and rather uses SMALL # and large for the different datasets kwargs = { "parameter": unpack_parameters(parameter), "start_date": start_date, "end_date": end_date, } if api.provider == Provider.DWD and api.kind == Kind.FORECAST: kwargs["mosmix_type"] = resolution kwargs["start_issue"] = issue elif api._resolution_type == ResolutionType.MULTI: kwargs["resolution"] = resolution if api._period_type == PeriodType.MULTI: kwargs["period"] = period with Settings: Settings.tidy = tidy Settings.humanize = humanize Settings.si_units = si_units Settings.skip_empty = skip_empty Settings.skip_threshold = skip_threshold Settings.dropna = dropna r = api(**kwargs) if all_: return r.all() elif station_id: return r.filter_by_station_id(station_id) elif name: return r.filter_by_name(name) # Use coordinates twice in main if-elif to get same KeyError elif coordinates and rank: lat, lon = coordinates.split(",") return r.filter_by_rank( latitude=float(lat), longitude=float(lon), rank=rank, ) elif coordinates and distance: lat, lon = coordinates.split(",") return r.filter_by_distance( latitude=float(lat), longitude=float(lon), distance=distance, ) elif bbox: try: left, bottom, right, top = bbox.split(",") except ValueError as e: raise ValueError( "bbox requires four floats separated by comma") from e return r.filter_by_bbox( left=float(left), bottom=float(bottom), right=float(right), top=float(top), ) elif sql: return r.filter_by_sql(sql) else: param_options = [ "all (boolean)", "station (string)", "name (string)", "coordinates (float,float) and rank (integer)", "coordinates (float,float) and distance (float)", "bbox (left float, bottom float, right float, top float)", ] raise KeyError( f"Give one of the parameters: {', '.join(param_options)}")
def __init__( self, station_ids: Tuple[str], mosmix_type: Union[str, DWDMosmixType], parameters: Optional[Tuple[Union[str, DWDMosmixParameter]]] = None, start_issue: Optional[ Union[str, datetime, DWDForecastDate] ] = DWDForecastDate.LATEST, end_issue: Optional[Union[str, datetime]] = None, start_date: Optional[Union[str, datetime]] = None, end_date: Optional[Union[str, datetime]] = None, humanize_parameters: bool = False, tidy_data: bool = True, ) -> None: """ :param station_ids: station ids which are being queried from the MOSMIX foreacst :param mosmix_type: type of forecast, either small (MOSMIX-S) or large (MOSMIX-L), as string or enumeration :param parameters: optional parameters for which the forecasts are filtered :param start_issue: start date of the MOSMIX forecast, can be used in combination with end_issue to query multiple MOSMIX forecasts, or instead used with enumeration to only query LATEST MOSMIX forecast :param end_issue: end issue of MOSMIX forecast, can be used to query multiple MOSMIX forecasts available on the server :param start_date: start date to limit the returned data to specified datetimes :param end_date: end date to limit the returned data to specified datetimes :param humanize_parameters: boolean if parameters shall be renamed to human readable names :param tidy_data: boolean if pandas.DataFrame shall be tidied and values put in rows """ # Use all parameters if none are given parameters = parameters or [*self._parameter_base] super(DWDMosmixData, self).__init__( station_ids=station_ids, parameters=parameters, start_date=start_date, end_date=end_date, humanize_parameters=humanize_parameters, ) self.mosmix_type = parse_enumeration_from_template(mosmix_type, DWDMosmixType) # Parse issue date if not set to fixed "latest" string if start_issue is DWDForecastDate.LATEST and end_issue: log.info( "end_issue will be ignored as 'latest' was selected for issue date" ) if start_issue is not DWDForecastDate.LATEST: if not start_issue and not end_issue: start_issue = DWDForecastDate.LATEST elif not end_issue: end_issue = start_issue elif not start_issue: start_issue = end_issue start_issue = pd.to_datetime(start_issue, infer_datetime_format=True).floor( "1H" ) end_issue = pd.to_datetime(end_issue, infer_datetime_format=True).floor( "1H" ) # Shift start date and end date to 3, 9, 15, 21 hour format if mosmix_type == DWDMosmixType.LARGE: start_issue = self.adjust_datetime(start_issue) end_issue = self.adjust_datetime(end_issue) self.start_issue = start_issue self.end_issue = end_issue self.humanize_parameters = humanize_parameters self.tidy_data = tidy_data # TODO: this should be replaced by the freq property in the main class if self.mosmix_type == DWDMosmixType.SMALL: self.freq = "1H" # short forecasts released every hour else: self.freq = "6H" self.kml = KMLReader(station_ids=self.station_ids, parameters=self.parameters)
def __init__( self, station_ids: List[Union[int, str]], parameters: List[Union[str, DWDObservationParameter, DWDObservationParameterSet]], resolution: Union[str, DWDObservationResolution], periods: Optional[List[Union[str, DWDObservationPeriod]]] = None, start_date: Optional[Union[str, Timestamp, datetime]] = None, end_date: Optional[Union[str, Timestamp, datetime]] = None, tidy_data: bool = True, humanize_parameters: bool = False, ) -> None: """ Class with mostly flexible arguments to define a request regarding DWD data. Special handling for period type. If start_date/end_date are given all period types are considered and merged together and the data is filtered for the given dates afterwards. :param station_ids: definition of stations by str, int or list of str/int, will be parsed to list of int :param parameters: Observation measure :param resolution: Frequency/granularity of measurement interval :param periods: Recent or historical files (optional), if None and start_date and end_date None, all period types are used :param start_date: Replacement for period type to define exact time of requested data, if used, period type will be set to all period types (hist, recent, now) :param end_date: Replacement for period type to define exact time of requested data, if used, period type will be set to all period types (hist, recent, now) :param tidy_data: Reshape DataFrame to a more tidy and row-based version of data :param humanize_parameters: Replace column names by more meaningful ones """ station_ids = pd.Series(station_ids).astype(str).str.pad( 5, "left", "0") if not station_ids.str.isdigit().all(): raise ValueError("station identifiers of DWD only contain digits") # Required before super call as parameter parsing for dwd requires resolution self.resolution = parse_enumeration_from_template( resolution, DWDObservationResolution) super(DWDObservationData, self).__init__( station_ids=station_ids, parameters=parameters, start_date=start_date, end_date=end_date, humanize_parameters=humanize_parameters, ) # If any date is given, use all period types and filter, else if not period type # is given use all period types if not periods: if self.start_date: periods = self._get_periods() else: periods = [*DWDObservationPeriod] else: # For the case that a period_type is given, parse the period type(s) periods = (pd.Series(periods).apply( parse_enumeration_from_template, args=(DWDObservationPeriod, )).sort_values().tolist()) if start_date or end_date: log.warning( f"start_date and end_date filtering limited to defined " f"periods {periods}") # For requests with start date and end date set in the future, we wont expect # any periods to be selected if not periods: log.warning( "start date and end date are out of range of any period.") self.periods = periods # If more then one parameter requested, automatically tidy data self.tidy_data = (tidy_data or len(self.parameters) > 1 or any([ not isinstance(parameter, DWDObservationParameterSet) for parameter, parameter_set in self.parameters ]))
def _parse_parameter(self, parameter: List[Union[str, Enum]]) -> List[Enum]: """ Method to parse parameters, either from string or enum. Case independent for strings. :param parameter: parameters as strings or enumerations :return: list of parameter enumerations of type self._parameter_base """ # TODO: refactor this! # for logging enums = [] if self._dataset_base: enums.append(self._dataset_base) enums.append(self._parameter_base) parameters = [] for parameter in pd.Series(parameter): parameter_ = None if self._dataset_base: try: parameter_ = parse_enumeration_from_template( parameter, self._dataset_base ) except InvalidEnumeration: pass else: parameters.append((parameter_, parameter_)) continue try: parameter_ = parse_enumeration_from_template( parameter, self._parameter_base[self._dataset_accessor] ) if self._unique_dataset: dataset = self._dataset_base[self._dataset_accessor] else: dataset = self._parameter_to_dataset_mapping[self.resolution][ parameter_ ] parameter_ = self._dataset_tree[self._dataset_accessor][ dataset.name ][parameter_.name] except InvalidEnumeration: pass else: parameters.append((parameter_, dataset)) continue try: parameter_ = parse_enumeration_from_template( parameter, self._parameter_base[self._dataset_accessor] ) parameters.append(parameter_) except InvalidEnumeration: pass if not parameter_: log.info(f"parameter {parameter} could not be parsed from ({enums})") return parameters
def _build_complete_df( self, df: pd.DataFrame, station_id: str, parameter: Enum ) -> pd.DataFrame: # For cases where requests are not defined by start and end date but rather by # periods, use the returned df without modifications # We may put a standard date range here if no data is found if not self.stations.start_date: return df dataset = None if self.stations.stations._has_datasets: parameter, dataset = parameter if parameter != dataset or not self.stations.stations.tidy: df = pd.merge( left=self._base_df, right=df, left_on=Columns.DATE.value, right_on=Columns.DATE.value, how="left", ) df[Columns.STATION_ID.value] = station_id if self.stations.tidy: df[Columns.PARAMETER.value] = parameter.value df[Columns.PARAMETER.value] = pd.Categorical( df[Columns.PARAMETER.value] ) if dataset: df[Columns.DATASET.value] = dataset.name.lower() df[Columns.DATASET.value] = pd.Categorical( df[Columns.DATASET.value] ) return df else: data = [] for parameter, group in df.groupby(Columns.PARAMETER.value, sort=False): if self.stations.stations._unique_dataset: parameter_ = parse_enumeration_from_template( parameter, self.stations.stations._parameter_base[ self.stations.resolution.name ], ) else: parameter_ = parse_enumeration_from_template( parameter, self.stations.stations._dataset_tree[ self.stations.resolution.name ][dataset.name], ) df = pd.merge( left=self._base_df, right=group, left_on=Columns.DATE.value, right_on=Columns.DATE.value, how="left", ) df[Columns.STATION_ID.value] = station_id df[Columns.PARAMETER.value] = parameter_.value df[Columns.DATASET.value] = dataset.name.lower() df[Columns.DATASET.value] = pd.Categorical(df[Columns.DATASET.value]) data.append(df) return pd.concat(data)