def _parse_geo_metadata(metadata_file_and_station_id: Tuple[BytesIO, str]) -> pd.DataFrame: """A function that analysis the given file (bytes) and extracts geography of 1minute metadata zip and catches the relevant information and create a similar file to those that can usually be found already prepared for other parameter combinations. Args: metadata_file_and_station_id (BytesIO, str) - the file that holds the information and the station id of that file. Return: A pandas DataFrame with the combined data for one respective station. """ metadata_file, station_id = metadata_file_and_station_id metadata_geo_filename = f"Metadaten_Geographie_{station_id}.txt" zfs = ZipFileSystem(metadata_file, mode="r") file = zfs.open(metadata_geo_filename).read() df = _parse_zipped_data_into_df(file) df = df.rename(columns=str.lower).rename(columns=GERMAN_TO_ENGLISH_COLUMNS_MAPPING) df[Columns.FROM_DATE.value] = df.loc[0, Columns.FROM_DATE.value] df = df.iloc[[-1], :] return df.reindex(columns=METADATA_COLUMNS)
def fetch(self, url) -> bytes: """ Fetch weather mosmix file (zipped xml). """ buffer = self.download(url) zfs = ZipFileSystem(buffer, "r") return zfs.open(zfs.glob("*")[0]).read()
def describe_all_csvs_in_zips(fs): for zip_name in get_zips(fs): print(zip_name) my_zip = ZipFileSystem("/tmp/dl.zip") for fname in my_zip.find(""): if not fname.endswith(".csv"): continue print(fname) df = pd.read_csv(my_zip.open(fname)) print(df.describe())
def __download_climate_observations_data(remote_file: str) -> bytes: try: file = download_file(remote_file, ttl=CacheExpiry.FIVE_MINUTES) except InvalidURL as e: raise InvalidURL(f"Error: the station data {remote_file} could not be reached.") from e except Exception: raise FailedDownload(f"Download failed for {remote_file}") try: zfs = ZipFileSystem(file) except BadZipFile as e: raise BadZipFile(f"The archive of {remote_file} seems to be corrupted.") from e product_file = zfs.glob("produkt*") if len(product_file) != 1: raise ProductFileNotFound(f"The archive of {remote_file} does not hold a 'produkt' file.") return zfs.open(product_file[0]).read()