Python AbstractFileSystem.glob Examples

Programming Language: Python

Namespace/Package Name: fsspec

Method/Function: glob

Examples at hotexamples.com: 2

Python AbstractFileSystem.glob - 2 examples found. These are the top rated real world Python examples of fsspec.AbstractFileSystem.glob extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

__init__(9)

exists(4)

created(2)

glob(2)

ls(2)

mkdir(2)

cat_file(1)

delete(1)

get_mapper(1)

isdir(1)

listdir(1)

mkdirs(1)

modified(1)

Example #1

Show file

def detect_folders(
    bucket: str,
    fs: fsspec.AbstractFileSystem,
) -> Mapping[str, DiagnosticFolder]:
    diag_ncs = fs.glob(os.path.join(bucket, "*", "diags.nc"))
    return {
        Path(url).parent.name: DiagnosticFolder(fs,
                                                Path(url).parent.as_posix())
        for url in diag_ncs
    }

Example #2

Show file

def write_parquet(
    fs: fsspec.AbstractFileSystem,
    path: str,
    df: pd.DataFrame,
    partition_cols: Optional[List[str]],
    schema: pa.Schema,
    **kwargs,
):
    """
    Write a single dataframe to parquet.
    """
    # Check partition values are valid before writing to parquet
    mappings = check_partition_columns(df=df, partition_columns=partition_cols)
    df = clean_partition_cols(df=df, mappings=mappings)

    # Dataframe -> pyarrow Table
    table = pa.Table.from_pandas(df, schema=schema)

    if "basename_template" not in kwargs and "ts_init" in df.columns:
        kwargs["basename_template"] = (
            f"{df['ts_init'].min()}-{df['ts_init'].max()}" + "-{i}.parquet"
        )

    # Write the actual file
    partitions = (
        ds.partitioning(
            schema=pa.schema(fields=[table.schema.field(c) for c in partition_cols]),
            flavor="hive",
        )
        if partition_cols
        else None
    )
    if pa.__version__ >= "6.0.0":
        kwargs.update(existing_data_behavior="overwrite_or_ignore")
    files = set(fs.glob(f"{path}/**"))
    ds.write_dataset(
        data=table,
        base_dir=path,
        filesystem=fs,
        partitioning=partitions,
        format="parquet",
        **kwargs,
    )

    # Ensure data written by write_dataset is sorted
    new_files = set(fs.glob(f"{path}/**/*.parquet")) - files
    del df
    for fn in new_files:
        ndf = pd.read_parquet(fs.open(fn))
        # assert ndf.shape[0] == shape
        if "ts_init" in ndf.columns:
            ndf = ndf.sort_values("ts_init").reset_index(drop=True)
        pq.write_table(
            table=pa.Table.from_pandas(ndf),
            where=fn,
            filesystem=fs,
        )

    # Write the ``_common_metadata`` parquet file without row groups statistics
    pq.write_metadata(table.schema, f"{path}/_common_metadata", version="2.6", filesystem=fs)

    # Write out any partition columns we had to modify due to filesystem requirements
    if mappings:
        write_partition_column_mappings(fs=fs, path=path, mappings=mappings)