Esempio n. 1
0
def read_feather(path: str, fs: fsspec.AbstractFileSystem = None):
    fs = fs or fsspec.filesystem("file")
    if not fs.exists(path):
        return
    try:
        with fs.open(path) as f:
            reader = pa.ipc.open_stream(f)
            return reader.read_pandas()
    except (pa.ArrowInvalid, FileNotFoundError):
        return
Esempio n. 2
0
def _get_merged_time_coordinate(source: str, target: str, dim: str,
                                fs: fsspec.AbstractFileSystem) -> xr.DataArray:
    source_ds = xr.open_zarr(source, consolidated=True)
    if dim in source_ds.coords:
        if fs.exists(target):
            target_ds = xr.open_zarr(fs.get_mapper(target), consolidated=True)
            time = xr.concat([target_ds[dim], source_ds[dim]], dim=dim)
        else:
            time = source_ds[dim]
        return time
Esempio n. 3
0
def find_available_groups(
    product_files: T.Dict[str, T.Tuple[str, str, str, str, str]],
    product_path: str,
    check_files_exist: bool = False,
    fs: fsspec.AbstractFileSystem = fsspec.filesystem("file"),
) -> T.Dict[str, T.List[str]]:
    groups: T.Dict[str, T.List[str]] = {}
    for path, (type, _, swath, polarization, _) in product_files.items():
        swath_pol_group = f"{swath}/{polarization}".upper()
        abspath = os.path.join(product_path, os.path.normpath(path))
        if check_files_exist:
            if not fs.exists(abspath):
                continue
        if type == "s1Level1ProductSchema":
            groups[swath.upper()] = [""]
            groups[swath_pol_group] = [abspath] + groups.get(
                swath_pol_group, [])
            for metadata_group in [
                    "orbit",
                    "attitude",
                    "azimuth_fm_rate",
                    "dc_estimate",
                    "gcp",
                    "coordinate_conversion",
            ]:
                groups[f"{swath_pol_group}/{metadata_group}"] = [abspath]
        elif type == "s1Level1CalibrationSchema":
            groups[f"{swath_pol_group}/calibration"] = [abspath]
        elif type == "s1Level1NoiseSchema":
            groups[f"{swath_pol_group}/noise_range"] = [abspath]
            groups[f"{swath_pol_group}/noise_azimuth"] = [abspath]
        elif type == "s1Level1MeasurementSchema":
            groups[swath_pol_group] = [abspath] + groups.get(
                swath_pol_group, [])

    return groups
Esempio n. 4
0
def append_zarr_along_time(source_path: str,
                           target_path: str,
                           fs: fsspec.AbstractFileSystem,
                           dim: str = "time"):
    """Append local zarr store at source_path to zarr store at target_path along time.
    
    Args:
        source_path: Local path to zarr store that represents an xarray dataset.
        target_path: Local or remote url for zarr store to be appended to.
        fs: Filesystem for target_path.
        dim: (optional) name of time dimension. Defaults to "time".

    Raises:
        ValueError: If the chunk size in time does not evenly divide length of time
            dimension for zarr stores at source_path.

    Warning:
        The zarr store as source_path will be modified in place.
    """

    merged_time = _get_merged_time_coordinate(source_path, target_path, dim,
                                              fs)
    if fs.exists(target_path):
        source_store = zarr.open(source_path, mode="r+")
        target_store = zarr.open_consolidated(fsspec.get_mapper(target_path))
        _assert_chunks_match(source_store, target_store, dim)
        _set_time_units_like(source_store, target_store)
        _shift_store(source_store, dim, _get_dim_size(target_store, dim))
    elif fs.protocol == "file":
        os.makedirs(target_path)

    upload_dir(source_path, target_path)
    _overwrite_time_array_with_single_chunk(target_path, merged_time, dim)

    _, _, absolute_target_paths = fsspec.get_fs_token_paths(target_path)
    consolidate_metadata(fs, absolute_target_paths[0])