def read_feather(path: str, fs: fsspec.AbstractFileSystem = None): fs = fs or fsspec.filesystem("file") if not fs.exists(path): return try: with fs.open(path) as f: reader = pa.ipc.open_stream(f) return reader.read_pandas() except (pa.ArrowInvalid, FileNotFoundError): return
def _get_merged_time_coordinate(source: str, target: str, dim: str, fs: fsspec.AbstractFileSystem) -> xr.DataArray: source_ds = xr.open_zarr(source, consolidated=True) if dim in source_ds.coords: if fs.exists(target): target_ds = xr.open_zarr(fs.get_mapper(target), consolidated=True) time = xr.concat([target_ds[dim], source_ds[dim]], dim=dim) else: time = source_ds[dim] return time
def find_available_groups( product_files: T.Dict[str, T.Tuple[str, str, str, str, str]], product_path: str, check_files_exist: bool = False, fs: fsspec.AbstractFileSystem = fsspec.filesystem("file"), ) -> T.Dict[str, T.List[str]]: groups: T.Dict[str, T.List[str]] = {} for path, (type, _, swath, polarization, _) in product_files.items(): swath_pol_group = f"{swath}/{polarization}".upper() abspath = os.path.join(product_path, os.path.normpath(path)) if check_files_exist: if not fs.exists(abspath): continue if type == "s1Level1ProductSchema": groups[swath.upper()] = [""] groups[swath_pol_group] = [abspath] + groups.get( swath_pol_group, []) for metadata_group in [ "orbit", "attitude", "azimuth_fm_rate", "dc_estimate", "gcp", "coordinate_conversion", ]: groups[f"{swath_pol_group}/{metadata_group}"] = [abspath] elif type == "s1Level1CalibrationSchema": groups[f"{swath_pol_group}/calibration"] = [abspath] elif type == "s1Level1NoiseSchema": groups[f"{swath_pol_group}/noise_range"] = [abspath] groups[f"{swath_pol_group}/noise_azimuth"] = [abspath] elif type == "s1Level1MeasurementSchema": groups[swath_pol_group] = [abspath] + groups.get( swath_pol_group, []) return groups
def append_zarr_along_time(source_path: str, target_path: str, fs: fsspec.AbstractFileSystem, dim: str = "time"): """Append local zarr store at source_path to zarr store at target_path along time. Args: source_path: Local path to zarr store that represents an xarray dataset. target_path: Local or remote url for zarr store to be appended to. fs: Filesystem for target_path. dim: (optional) name of time dimension. Defaults to "time". Raises: ValueError: If the chunk size in time does not evenly divide length of time dimension for zarr stores at source_path. Warning: The zarr store as source_path will be modified in place. """ merged_time = _get_merged_time_coordinate(source_path, target_path, dim, fs) if fs.exists(target_path): source_store = zarr.open(source_path, mode="r+") target_store = zarr.open_consolidated(fsspec.get_mapper(target_path)) _assert_chunks_match(source_store, target_store, dim) _set_time_units_like(source_store, target_store) _shift_store(source_store, dim, _get_dim_size(target_store, dim)) elif fs.protocol == "file": os.makedirs(target_path) upload_dir(source_path, target_path) _overwrite_time_array_with_single_chunk(target_path, merged_time, dim) _, _, absolute_target_paths = fsspec.get_fs_token_paths(target_path) consolidate_metadata(fs, absolute_target_paths[0])