def _parse_geo_metadata(metadata_file_and_station_id: Tuple[BytesIO, str]) -> pd.DataFrame: """A function that analysis the given file (bytes) and extracts geography of 1minute metadata zip and catches the relevant information and create a similar file to those that can usually be found already prepared for other parameter combinations. Args: metadata_file_and_station_id (BytesIO, str) - the file that holds the information and the station id of that file. Return: A pandas DataFrame with the combined data for one respective station. """ metadata_file, station_id = metadata_file_and_station_id metadata_geo_filename = f"Metadaten_Geographie_{station_id}.txt" zfs = ZipFileSystem(metadata_file, mode="r") file = zfs.open(metadata_geo_filename).read() df = _parse_zipped_data_into_df(file) df = df.rename(columns=str.lower).rename(columns=GERMAN_TO_ENGLISH_COLUMNS_MAPPING) df[Columns.FROM_DATE.value] = df.loc[0, Columns.FROM_DATE.value] df = df.iloc[[-1], :] return df.reindex(columns=METADATA_COLUMNS)
def fetch(self, url) -> bytes: """ Fetch weather mosmix file (zipped xml). """ buffer = self.download(url) zfs = ZipFileSystem(buffer, "r") return zfs.open(zfs.glob("*")[0]).read()
def describe_all_csvs_in_zips(fs): for zip_name in get_zips(fs): print(zip_name) my_zip = ZipFileSystem("/tmp/dl.zip") for fname in my_zip.find(""): if not fname.endswith(".csv"): continue print(fname) df = pd.read_csv(my_zip.open(fname)) print(df.describe())
def test_open_zip_fs_regular_filename(self): """Test opening a zipfs with a regular filename provided.""" from satpy.readers import FSFile from fsspec.implementations.zip import ZipFileSystem zip_fs = ZipFileSystem(self.zip_name) file = FSFile(_posixify_path(self.local_filename2), zip_fs) _assert_is_open_file_and_close(file.open())
def __download_climate_observations_data(remote_file: str) -> bytes: try: file = download_file(remote_file, ttl=CacheExpiry.FIVE_MINUTES) except InvalidURL as e: raise InvalidURL(f"Error: the station data {remote_file} could not be reached.") from e except Exception: raise FailedDownload(f"Download failed for {remote_file}") try: zfs = ZipFileSystem(file) except BadZipFile as e: raise BadZipFile(f"The archive of {remote_file} seems to be corrupted.") from e product_file = zfs.glob("produkt*") if len(product_file) != 1: raise ProductFileNotFound(f"The archive of {remote_file} does not hold a 'produkt' file.") return zfs.open(product_file[0]).read()
def test_equality(self): """Test that FSFile compares equal when it should.""" from satpy.readers import FSFile from fsspec.implementations.zip import ZipFileSystem zip_fs = ZipFileSystem(self.zip_name) assert FSFile(self.local_filename) == FSFile(self.local_filename) assert (FSFile(self.local_filename, zip_fs) == FSFile(self.local_filename, zip_fs)) assert (FSFile(self.local_filename, zip_fs) != FSFile( self.local_filename)) assert FSFile(self.local_filename) != FSFile(self.local_filename2)
def test_sorting_fsfiles(self): """Test sorting FSFiles.""" from fsspec.implementations.zip import ZipFileSystem from satpy.readers import FSFile zip_fs = ZipFileSystem(self.zip_name) file1 = FSFile(self.local_filename2, zip_fs) file2 = FSFile(self.local_filename) extra_file = os.path.normpath('/somedir/bla') sorted_filenames = [os.fspath(file) for file in sorted([file1, file2, extra_file])] expected_filenames = sorted([extra_file, os.fspath(file1), os.fspath(file2)]) assert sorted_filenames == expected_filenames
def test_hash(self): """Test that FSFile hashing behaves sanely.""" from fsspec.implementations.cached import CachingFileSystem from fsspec.implementations.local import LocalFileSystem from fsspec.implementations.zip import ZipFileSystem from satpy.readers import FSFile lfs = LocalFileSystem() zfs = ZipFileSystem(self.zip_name) cfs = CachingFileSystem(fs=lfs) # make sure each name/fs-combi has its own hash assert len({hash(FSFile(fn, fs)) for fn in {self.local_filename, self.local_filename2} for fs in [None, lfs, zfs, cfs]}) == 2*4
def file_system(self, request, tmp_path): """Prepare filesystem. Prepare a filesystem to test if the filesystem spec works with it. """ if request.param == "zip": from fsspec.implementations.zip import ZipFileSystem from shutil import make_archive # prepare archive to test on archive = tmp_path / "test" shutil.make_archive(archive, "zip", os.curdir, self.refdir) return ZipFileSystem(archive.with_suffix(".zip")) elif request.param == "local": from fsspec.implementations.local import LocalFileSystem return LocalFileSystem()
describe_all_csvs_in_zips(fs) dlf = fsspec.open("/tmp/dl.zip") with dlf as f: zipf = zipfile.ZipFile(f) print(zipf.infolist()) dlf.close() d1f = fsspec.open("zip://dummy1.csv::/tmp/dl.zip", "rt") with d1f as f: print(f.read()) #d1f = fsspec.open("zip://dummy1.csv::github://tiagoantao:python-performance@/08-persistence/sec1-fsspec/dummy.zip") #with d1f as f: # print(pd.read_csv(f)) zfs = ZipFileSystem("/tmp/dl.zip") arrow_fs = PyFileSystem(FSSpecHandler(zfs)) my_csv = csv.read_csv(arrow_fs.open_input_stream("dummy1.csv")) print(my_csv) #with fsspec.open("zip:local.zip/dummy1.csv") as f: # pd.read_csv(f) ##fsa = fsspec.get_mapper("github://*****:*****@") ##print(fsa) ## fs = fsspec.open("git_https.py") ## with fs as f: ## print(f)