def test_meta_index_creation_failure(): with pytest.raises(FileNotFoundError): create_meta_index_for_climate_observations( DwdObservationDataset.CLIMATE_SUMMARY, Resolution.MINUTE_1, Period.HISTORICAL, )
def test_meta_index_creation(): # Existing combination of parameters meta_index = create_meta_index_for_climate_observations( DwdObservationDataset.CLIMATE_SUMMARY, Resolution.DAILY, Period.HISTORICAL, ) assert not meta_index.empty with pytest.raises(requests.exceptions.HTTPError): create_meta_index_for_climate_observations( DwdObservationDataset.CLIMATE_SUMMARY, Resolution.MINUTE_1, Period.HISTORICAL, )
def test_meta_index_creation_success(): # Existing combination of parameters meta_index = create_meta_index_for_climate_observations( DwdObservationDataset.CLIMATE_SUMMARY, Resolution.DAILY, Period.HISTORICAL, ) assert not meta_index.empty
def _all(self) -> pd.DataFrame: """ :return: """ datasets = pd.Series(self.parameter).map(lambda x: x[1]).unique() stations = [] for dataset in datasets: # First "now" period as it has more updated end date up to the last "now" # values for period in reversed(self.period): if not check_dwd_observations_dataset(dataset, self.resolution, period): log.warning( f"The combination of {dataset.value}, " f"{self.resolution.value}, {period.value} is invalid.") continue df = create_meta_index_for_climate_observations( dataset, self.resolution, period) file_index = create_file_index_for_climate_observations( dataset, self.resolution, period) df = df[df.loc[:, Columns.STATION_ID.value].isin( file_index[Columns.STATION_ID.value])] stations.append(df) try: stations_df = pd.concat(stations) except ValueError: return pd.DataFrame() stations_df = stations_df.drop_duplicates( subset=Columns.STATION_ID.value, keep="first") if not stations_df.empty: return stations_df.sort_values([Columns.STATION_ID.value], key=lambda x: x.astype(int)) return stations_df
def test_meta_index_1mph_creation(): meta_index_1mph = create_meta_index_for_climate_observations( DwdObservationDataset.PRECIPITATION, Resolution.MINUTE_1, Period.HISTORICAL, ) assert meta_index_1mph.loc[meta_index_1mph[Columns.STATION_ID.value] == "00003", :].values.tolist() == [[ "00003", "18910101", "20120406", "202.00", "50.7827", "6.0941", "Aachen", "Nordrhein-Westfalen", ]]
def _all(self) -> pd.DataFrame: datasets = pd.Series(self.parameter).map(lambda x: x[1]).unique() stations_df = pd.DataFrame() for dataset in datasets: # First "now" period as it has more updated end date up to the last "now" # values for period in reversed(self.period): # TODO: move to _all and replace error with logging + empty dataframe if not check_dwd_observations_dataset(dataset, self.resolution, period): log.warning( f"The combination of {dataset.value}, " f"{self.resolution.value}, {period.value} is invalid.") continue df = create_meta_index_for_climate_observations( dataset, self.resolution, period) file_index = create_file_index_for_climate_observations( dataset, self.resolution, period) df = df[df.loc[:, Columns.STATION_ID.value].isin( file_index[Columns.STATION_ID.value])] if not stations_df.empty: df = df[~df[Columns.STATION_ID.value]. isin(stations_df[Columns.STATION_ID.value])] stations_df = stations_df.append(df) if not stations_df.empty: stations_df = stations_df.sort_values([Columns.STATION_ID.value], key=lambda x: x.astype(int)) return stations_df