Python open_files 예제들, fsspec.open_files Python 예제들

예제 #1

0

파일 보기

파일: test_cached.py 프로젝트: scott-zhou/filesystem_spec

def test_cached_write(protocol):
    d = tempfile.mkdtemp()
    with fsspec.open_files(f"{protocol}::file://{d}/*.out", mode="wb", num=2) as files:
        for f in files:
            f.write(b"data")

    assert sorted(os.listdir(d)) == ["0.out", "1.out"]

예제 #2

0

파일 보기

파일: local.py 프로젝트: vctmohan/intake

    def _load(self, reload=False):
        """Load text of catalog file and pass to parse

        Will do nothing if auto-reload is off and reload is not explicitly
        requested
        """
        if self.access is False:
            # skip first load, if cat has given name (i.e., is subcat)
            self.updated = 0
            self.access = True
            return
        if self.autoreload or reload:
            # First, we load from YAML, failing if syntax errors are found
            options = self.storage_options or {}
            if hasattr(self.path, 'path') or hasattr(self.path, 'read'):
                file_open = self.path
                self.path = make_path_posix(
                    getattr(self.path, 'path',
                            getattr(self.path, 'name', 'file')))
            elif self.filesystem is None:
                file_open = open_files(self.path, mode='rb', **options)
                assert len(file_open) == 1
                file_open = file_open[0]
                self.filesystem = file_open.fs
            else:
                file_open = self.filesystem.open(self.path, mode='rb')
            self._dir = get_dir(self.path)

            with file_open as f:
                text = f.read().decode()
            if "!template " in text:
                logger.warning("Use of '!template' deprecated - fixing")
                text = text.replace('!template ', '')
            self.parse(text)

예제 #3

0

파일 보기

파일: test_json.py 프로젝트: vishalbelsare/intake

def jsonl_file(request, tmp_path) -> str:
    data = [{"hello": "world"}, [1, 2, 3]]
    file_path = str(tmp_path / "1.jsonl")
    file_path += EXTENSIONS.get(request.param, "")
    with open_files([file_path], mode="wt", compression=request.param)[0] as f:
        f.write("\n".join(json.dumps(row) for row in data))
    return file_path

예제 #4

0

파일 보기

파일: test_hdf.py 프로젝트: intake/fsspec-reference-maker

def test_multizarr(generate_mzz):
    """Test creating a combined reference file with MultiZarrToZarr"""
    mzz = generate_mzz
    test_dict = mzz.translate()

    m = fsspec.get_mapper("reference://",
                          fo=test_dict,
                          remote_protocol="s3",
                          remote_options=so)
    ds = xr.open_dataset(m,
                         engine="zarr",
                         backend_kwargs=dict(consolidated=False))

    with fsspec.open_files(urls, **so) as fs:
        expts = [xr.open_dataset(f, engine="h5netcdf") for f in fs]
        expected = xr.concat(expts, dim="time").drop_vars("crs")

        assert set(ds) == set(expected)
        for name in ds:
            exp = {
                k: v.tolist() if isinstance(v, np.ndarray) else v
                for k, v in expected[name].attrs.items()
            }
            assert dict(ds[name].attrs) == exp
        for coo in ds.coords:
            if ds[coo].dtype.kind == "M":
                assert (ds[coo].values - expected[coo].values < np.array(
                    [1], dtype="<m8[ms]")).all()
            else:
                assert np.allclose(ds[coo].values, expected[coo].values)

예제 #5

0

파일 보기

    def _get_schema(self):
        from fsspec import open_files
        import dask.array as da
        if self._arr is None:
            path = self._get_cache(self.path)[0]

            files = open_files(path, 'rb', compression=None,
                               **self.storage)
            if self.shape is None:
                arr = NumpyAccess(files[0])
                self.shape = arr.shape
                self.dtype = arr.dtype
                arrs = [arr] + [NumpyAccess(f, self.shape, self.dtype,
                                            offset=arr.offset)
                                for f in files[1:]]
            else:
                arrs = [NumpyAccess(f, self.shape, self.dtype)
                        for f in files]
            self.chunks = (self._chunks, ) + (-1, ) * (len(self.shape) - 1)
            self._arrs = [da.from_array(arr, self.chunks) for arr in arrs]

            if len(self._arrs) > 1:
                self._arr = da.stack(self._arrs)
            else:
                self._arr = self._arrs[0]
            self.chunks = self._arr.chunks
        return Schema(dtype=str(self.dtype), shape=self.shape,
                      extra_metadata=self.metadata,
                      npartitions=self._arr.npartitions,
                      chunks=self.chunks)

예제 #6

0

파일 보기

파일: test_json.py 프로젝트: vishalbelsare/intake

def json_file(request, tmp_path) -> str:
    data = {"hello": "world"}
    file_path = str(tmp_path / "1.json")
    file_path += EXTENSIONS.get(request.param, "")
    with open_files([file_path], mode="wt", compression=request.param)[0] as f:
        f.write(json.dumps(data))
    return file_path

예제 #7

0

파일 보기

    def _load_object_detection_api(self, model_spec: ObjectDetectionAPI_ModelSpec):
        import tensorflow as tf
        from object_detection.utils import config_util
        from object_detection.builders import model_builder
        temp_dir = tempfile.TemporaryDirectory()
        temp_dir_path = Path(temp_dir.name)
        model_config_path = temp_dir_path / Pathy(model_spec.config_path).name
        with open(model_config_path, 'wb') as out:
            with fsspec.open(model_spec.config_path, 'rb') as src:
                out.write(src.read())
        src_checkpoint_path = Pathy(model_spec.checkpoint_path)
        checkpoint_path = temp_dir_path / src_checkpoint_path.name
        for src_file in fsspec.open_files(f"{src_checkpoint_path}*", 'rb'):
            out_file = temp_dir_path / Pathy(src_file.path).name
            with open(out_file, 'wb') as out:
                with src_file as src:
                    out.write(src.read())
        configs = config_util.get_configs_from_pipeline_file(
            pipeline_config_path=str(model_config_path)
        )
        model_config = configs['model']
        self.model = model_builder.build(
            model_config=model_config, is_training=False
        )
        ckpt = tf.compat.v2.train.Checkpoint(model=self.model)
        ckpt.restore(str(checkpoint_path)).expect_partial()
        self.input_dtype = np.float32

        # Run model through a dummy image so that variables are created
        zeros = np.zeros([640, 640, 3])
        self._raw_predict_single_image_default(zeros)

        temp_dir.cleanup()

예제 #8

0

파일 보기

파일: zip.py 프로젝트: jonathan-hourany/filesystem_spec

 def __init__(self, fo="", mode="r", **storage_options):
     """
     Parameters
     ----------
     fo: str or file-like
         Contains ZIP, and must exist. If a str, will fetch file using
         `open_files()`, which must return one file exactly.
     mode: str
         Currently, only 'r' accepted
     storage_options: key-value
         May be credentials, e.g., `{'auth': ('username', 'pword')}` or any
         other parameters for requests
     """
     if self._cached:
         return
     AbstractFileSystem.__init__(self)
     if mode != "r":
         raise ValueError("Only read from zip files accepted")
     self.in_fo = fo
     if isinstance(fo, str):
         files = open_files(fo)
         if len(files) != 1:
             raise ValueError('Path "{}" did not resolve to exactly'
                              'one file: "{}"'.format(fo, files))
         fo = files[0]
     self.fo = fo.__enter__()  # the whole instance is a context
     self.zip = zipfile.ZipFile(self.fo)
     self.block_size = storage_options.get("block_size", DEFAULT_BLOCK_SIZE)
     self.dir_cache = None

예제 #9

0

파일 보기

    def _load_metadata(self):
        import dask.dataframe as dd
        import dask.delayed
        from fsspec import open_files
        self.files = open_files(self.url, **self.storage_options)

        def read_a_file(open_file, reader, kwargs):
            with open_file as of:
                df = reader(of, **kwargs)
                df['path'] = open_file.path
                return df

        if self.dataframe is None:
            self.parts = [
                dask.delayed(read_a_file)(open_file, self.reader, self.kwargs)
                for open_file in self.files
            ]
            self.dataframe = dd.from_delayed(self.parts)
            self.npartitions = self.dataframe.npartitions
            self.shape = (None, len(self.dataframe.columns))
            self.dtype = self.dataframe.dtypes.to_dict()
            self._schema = Schema(npartitions=self.npartitions,
                                  extra_metadata=self.metadata,
                                  dtype=self.dtype,
                                  shape=self.shape,
                                  datashape=None)
        return self._schema

예제 #10

0

파일 보기

파일: glue.py 프로젝트: joleenf/polar2grid

def _fsfiles_for_s3(input_filenames):
    """Convert S3 URLs to something Satpy can understand and use.

    Examples:
        Example S3 URLs (no caching):

        .. code-block:: bash

            polar2grid.sh ... -f s3://noaa-goes16/ABI-L1b-RadC/2019/001/17/*_G16_s20190011702186*

        Example S3 URLs using fsspec caching:

        .. code-block:: bash

            polar2grid.sh ... -f simplecache::s3://noaa-goes16/ABI-L1b-RadC/2019/001/17/*_G16_s20190011702186*

    """
    import fsspec
    from satpy.readers import FSFile

    kwargs = {"anon": True}
    if "simplecache::" in input_filenames[0]:
        kwargs = {"s3": kwargs}
    for open_file in fsspec.open_files(input_filenames, **kwargs):
        yield FSFile(open_file)

예제 #11

0

파일 보기

def test_multi_cache_chain(protocol):
    import zipfile

    d = tempfile.mkdtemp()
    fn = os.path.join(d, "test.zip")
    zipfile.ZipFile(fn, mode="w").open("test", "w").write(b"hello")

    with fsspec.open_files(f"zip://test::{protocol}::file://{fn}") as files:
        assert d not in files[0]._fileobj._file.name
        assert files[0].read() == b"hello"

    # special test contains "file:" string
    fn = os.path.join(d, "file.zip")
    zipfile.ZipFile(fn, mode="w").open("file", "w").write(b"hello")
    with fsspec.open_files(f"zip://file::{protocol}::file://{fn}") as files:
        assert d not in files[0]._fileobj._file.name
        assert files[0].read() == b"hello"

예제 #12

0

파일 보기

파일: utils.py 프로젝트: BENR0/satpy

def _filenames_to_fsfile(filenames, storage_options):
    import fsspec

    from satpy.readers import FSFile

    if filenames:
        fsspec_files = fsspec.open_files(filenames, **storage_options)
        return [FSFile(f) for f in fsspec_files]
    return []

예제 #13

0

파일 보기

파일: cache.py 프로젝트: paalka/intake

    def _make_files(self, urlpath, **kwargs):
        import tempfile
        d = tempfile.mkdtemp()
        from fsspec import open_files

        self._ensure_cache_dir()
        self._urlpath = urlpath
        files_in = open_files(urlpath, 'rb', **self._storage_options)
        files_out = [
            open_files(
                [make_path_posix(os.path.join(d, os.path.basename(f.path)))],
                'wb')[0] for f in files_in
        ]
        super(CompressedCache, self)._load(files_in,
                                           files_out,
                                           urlpath,
                                           meta=False)
        return files_in, files_out

예제 #14

0

파일 보기

파일: images.py 프로젝트: epoch8/cv-pipeliner

def get_label_to_base_label_image(
    base_labels_images: Union[str, Path],
    label_to_description: Union[str, Path, Dict[str, str]] = None,
    add_label_to_image: bool = False,
    make_labels_for_these_class_names_too: List[str] = [
    ]  # add known description to classes without base images
) -> Dict[str, np.ndarray]:
    if base_labels_images is None:
        return None

    base_labels_images_files = fsspec.open_files(str(base_labels_images))
    ann_class_names_files = [
        Pathy(base_label_image_file.path).stem
        for base_label_image_file in base_labels_images_files
    ]
    unique_ann_class_names = set(ann_class_names_files)
    if 'unknown' not in unique_ann_class_names:
        raise ValueError(
            f'"{base_labels_images}" must have image with name "unknown.*"')
    unknown_image_path = base_labels_images_files[ann_class_names_files.index(
        'unknown')]
    label_to_base_label_image = defaultdict(lambda: unknown_image_path)
    label_to_base_label_image['unknown'] = unknown_image_path
    logger.info(f"Loading base labels images from {base_labels_images}...")
    for label in tqdm(
            list(unique_ann_class_names) +
            list(set(make_labels_for_these_class_names_too))):
        if label in unique_ann_class_names:
            base_label_image = base_labels_images_files[
                ann_class_names_files.index(label)]
        else:
            base_label_image = label_to_base_label_image['unknown']
        label_to_base_label_image[label] = base_label_image

    def label_to_base_label_image_func(
            label: str,
            label_to_description: Union[str, Path,
                                        Dict[str, str]] = label_to_description,
            add_label_to_image: bool = add_label_to_image):
        base_label_image = open_image(label_to_base_label_image[label])
        if label_to_description is not None:
            if isinstance(label_to_description, str) or isinstance(
                    label_to_description, Path):
                label_to_description = get_label_to_description(
                    label_to_description_dict=label_to_description)
            base_label_image = get_base_label_image_with_description(
                base_label_image=base_label_image,
                label=label,
                description=label_to_description[label])
        elif add_label_to_image:
            base_label_image = get_base_label_image_with_description(
                base_label_image=base_label_image, label=label, description='')
        return base_label_image

    return label_to_base_label_image_func

예제 #15

0

파일 보기

def test_complex(ftp_writable):
    host, port, user, pw = ftp_writable
    files = open_files('ftp:///ou*', host=host, port=port,
                       username=user, password=pw,
                       block_size=10000)
    assert len(files) == 1
    with files[0] as fo:
        assert fo.read(10) == b'hellohello'
        assert len(fo.cache) == 10010
        assert fo.read(2) == b'he'
        assert fo.tell() == 12

예제 #16

0

파일 보기

파일: test_ftp.py 프로젝트: mtrbean/filesystem_spec

def test_complex(ftp_writable, cache_type):
    from fsspec.core import BytesCache
    host, port, user, pw = ftp_writable
    files = open_files('ftp:///ou*', host=host, port=port,
                       username=user, password=pw,
                       block_size=10000, cache_type=cache_type)
    assert len(files) == 1
    with files[0] as fo:
        assert fo.read(10) == b'hellohello'
        if isinstance(fo.cache, BytesCache):
            assert len(fo.cache.cache) == 10010
        assert fo.read(2) == b'he'
        assert fo.tell() == 12

예제 #17

0

파일 보기

파일: conftest.py 프로젝트: ITISFoundation/osparc-simcore

def ftp_server(ftpserver: ProcessFTPServer) -> List[URL]:
    faker = Faker()

    files = ["file_1", "file_2", "file_3"]
    ftp_server_base_url = ftpserver.get_login_data(style="url")
    list_of_file_urls = [f"{ftp_server_base_url}/{filename}.txt" for filename in files]
    with fsspec.open_files(list_of_file_urls, "wt") as open_files:
        for index, fp in enumerate(open_files):
            fp.write(f"This is the file contents of '{files[index]}'\n")
            for s in faker.sentences():
                fp.write(f"{s}\n")

    return [URL(f) for f in list_of_file_urls]

예제 #18

0

파일 보기

파일: test_api.py 프로젝트: intake/filesystem_spec

def test_chained_fs_multi():
    d1 = tempfile.mkdtemp()
    d2 = tempfile.mkdtemp()
    f1 = os.path.join(d1, "f1")
    f2 = os.path.join(d1, "f2")
    with open(f1, "wb") as f:
        f.write(b"test1")
    with open(f2, "wb") as f:
        f.write(b"test2")

    of = fsspec.open_files(
        f"simplecache::file://{d1}/*",
        simplecache={
            "cache_storage": d2,
            "same_names": True
        },
    )
    with of[0] as f:
        assert f.read() == b"test1"
    with of[1] as f:
        assert f.read() == b"test2"

    assert sorted(os.listdir(d2)) == ["f1", "f2"]

    d2 = tempfile.mkdtemp()

    of = fsspec.open_files(
        [f"simplecache::file://{f1}", f"simplecache::file://{f2}"],
        simplecache={
            "cache_storage": d2,
            "same_names": True
        },
    )
    with of[0] as f:
        assert f.read() == b"test1"
    with of[1] as f:
        assert f.read() == b"test2"

    assert sorted(os.listdir(d2)) == ["f1", "f2"]

예제 #19

0

파일 보기

파일: test_api.py 프로젝트: intake/filesystem_spec

def test_multilevel_chained_fs():
    """This test reproduces fsspec/filesystem_spec#334"""
    import zipfile

    d1 = tempfile.mkdtemp()
    f1 = os.path.join(d1, "f1.zip")
    with zipfile.ZipFile(f1, mode="w") as z:
        # filename, content
        z.writestr("foo.txt", "foo.txt")
        z.writestr("bar.txt", "bar.txt")

    # We expected this to be the correct syntax
    with pytest.raises(IsADirectoryError):
        of = fsspec.open_files(f"zip://*.txt::simplecache::file://{f1}")
        assert len(of) == 2

    # But this is what is actually valid...
    of = fsspec.open_files(f"zip://*.txt::simplecache://{f1}::file://")

    assert len(of) == 2
    for open_file in of:
        with open_file as f:
            assert f.read().decode("utf-8") == f.name

예제 #20

0

파일 보기

def last_checkpoint(
        checkpoints_dir: str,
        mode: str = 'rb') -> Generator[CheckpointSpecType, None, None]:
    """Return the last checkpoint or None."""
    checkpoint_glob = os.path.join(checkpoints_dir, '*.pt')
    files = fsspec.open_files(checkpoint_glob, 'rb')
    if files:
        round_id = chain.from_iterable(
            ROUND_EXPRESSION.findall(f.path) for f in files)
        round_number, i = max((int(r), i) for i, r in enumerate(round_id))
        with files[i] as checkpoint_file:
            yield checkpoint_file, round_number
    else:
        yield None, None

예제 #21

0

파일 보기

파일: geopandas.py 프로젝트: intake/intake_geopandas

 def _open_dataset(self):
     """
     Open dataset using geopandas.
     """
     if self._use_fsspec:
         with fsspec.open_files(self.urlpath, **self.storage_options) as f:
             f = self._resolve_single_file(f) if len(f) > 1 else f[0]
             self._dataframe = geopandas.read_parquet(
                 f,
                 **self._geopandas_kwargs,
             )
     else:
         self._dataframe = geopandas.read_parquet(self.urlpath,
                                                  **self._geopandas_kwargs)

예제 #22

0

파일 보기

    def save(self, url, storage_options=None):
        """
        Output this catalog to a file as YAML

        Parameters
        ----------
        url : str
            Location to save to, perhaps remote
        storage_options : dict
            Extra arguments for the file-system
        """
        from fsspec import open_files
        with open_files([url], **(storage_options or {}), mode='wt')[0] as f:
            f.write(self.serialize())

예제 #23

0

파일 보기

def test_multi_cache(protocol):
    with fsspec.open_files("memory://file*", "wb", num=2) as files:
        for f in files:
            f.write(b"hello")

    d2 = tempfile.mkdtemp()
    lurl = fsspec.open_local(
        f"{protocol}::memory://file*",
        mode="rb",
        **{protocol: {
            "cache_storage": d2,
            "same_names": True
        }},
    )
    assert all(d2 in u for u in lurl)
    assert all(os.path.basename(f) in ["file0", "file1"] for f in lurl)
    assert all(open(u, "rb").read() == b"hello" for u in lurl)

    d2 = tempfile.mkdtemp()
    lurl = fsspec.open_files(
        f"{protocol}::memory://file*",
        mode="rb",
        **{protocol: {
            "cache_storage": d2,
            "same_names": True
        }},
    )
    with lurl as files:
        for f in files:
            assert os.path.basename(f.name) in ["file0", "file1"]
            assert f.read() == b"hello"
    fs = fsspec.filesystem("memory")
    fs.store.clear()
    with lurl as files:
        for f in files:
            assert os.path.basename(f.name) in ["file0", "file1"]
            assert f.read() == b"hello"

예제 #24

0

파일 보기

파일: test_api.py 프로젝트: rux-pizza/filesystem_spec

def test_chained_fo():
    import zipfile

    d1 = tempfile.mkdtemp()
    f1 = os.path.join(d1, "temp.zip")
    d3 = tempfile.mkdtemp()
    with zipfile.ZipFile(f1, mode="w") as z:
        z.writestr("afile", b"test")

    of = fsspec.open(f"zip://afile::file://{f1}")
    with of as f:
        assert f.read() == b"test"

    of = fsspec.open_files(f"zip://*::file://{f1}")
    with of[0] as f:
        assert f.read() == b"test"

    of = fsspec.open_files(
        f"simplecache::zip://*::file://{f1}",
        simplecache={"cache_storage": d3, "same_names": True},
    )
    with of[0] as f:
        assert f.read() == b"test"
    assert "afile" in os.listdir(d3)

예제 #25

0

파일 보기

    def _determine_dims(self):
        logger.debug("open mappers")

        # If self.path is a list of dictionaries, pass them directly to fsspec.filesystem
        import collections.abc
        if isinstance(self.path[0], collections.abc.Mapping):
            fo_list = self.path

        # If self.path is list of files, open the files and load the json as a dictionary
        else:
            with fsspec.open_files(self.path, **self.storage_options) as ofs:
                fo_list = [json.load(of) for of in ofs]

        fss = [
            fsspec.filesystem("reference",
                              fo=fo,
                              remote_protocol=self.remote_protocol,
                              remote_options=self.remote_options)
            for fo in fo_list
        ]
        self.fs = fss[0].fs
        mappers = [fs.get_mapper("") for fs in fss]

        logger.debug("open first two datasets")
        xr_kwargs_copy = self.xr_kwargs.copy()

        # Add consolidated=False to xr kwargs if not explictly given by user
        # needed to suppress zarr open warnings
        if (version.parse(xr.__version__) >= version.parse("0.19.0")
                and 'consolidated' not in xr_kwargs_copy):
            xr_kwargs_copy['consolidated'] = False

        dss = [
            xr.open_dataset(m, engine="zarr", chunks={}, **xr_kwargs_copy)
            for m in mappers[:2]
        ]

        if self.preprocess:
            logger.debug("preprocess")
            dss = [self.preprocess(d) for d in dss]
        logger.debug("concat")
        ds = xr.concat(dss, **self.concat_kwargs)
        ds0 = dss[0]
        self.extra_dims = set(ds.dims) - set(ds0.dims)
        self.concat_dims = set(k for k, v in ds.dims.items()
                               if k in ds0.dims and v / ds0.dims[k] == 2)
        self.same_dims = set(ds.dims) - self.extra_dims - self.concat_dims
        return ds, ds0, fss

예제 #26

0

파일 보기

    def _get_schema(self):
        from fsspec import open_files
        if self._files is None:

            urlpath = self._get_cache(self._urlpath)[0]

            self._files = open_files(urlpath,
                                     mode=self.mode,
                                     encoding=self.encoding,
                                     compression=self.compression,
                                     **self._storage_options)
            self.npartitions = len(self._files)
        return base.Schema(dtype=None,
                           shape=(None, ),
                           npartitions=self.npartitions,
                           extra_metadata=self.metadata)

예제 #27

0

파일 보기

파일: io.py 프로젝트: PrettyWood/polars

def _prepare_file_arg(
    file: Union[str, List[str], TextIO, Path, BinaryIO, bytes], **kwargs: Any
) -> ContextManager[Union[str, BinaryIO, List[str], List[BinaryIO]]]:
    """
    Utility for read_[csv, parquet]. (not to be used by scan_[csv, parquet]).
    Returned value is always usable as a context.

    A `StringIO`, `BytesIO` file is returned as a `BytesIO`.
    A local path is returned as a string.
    An http URL is read into a buffer and returned as a `BytesIO`.

    When fsspec is installed, remote file(s) is (are) opened with
    `fsspec.open(file, **kwargs)` or `fsspec.open_files(file, **kwargs)`.
    """

    # Small helper to use a variable as context
    @contextmanager
    def managed_file(file: Any) -> Iterator[Any]:
        try:
            yield file
        finally:
            pass

    if isinstance(file, StringIO):
        return BytesIO(file.read().encode("utf8"))
    if isinstance(file, BytesIO):
        return managed_file(file)
    if isinstance(file, Path):
        return managed_file(format_path(file))
    if isinstance(file, str):
        if _WITH_FSSPEC:
            if infer_storage_options(file)["protocol"] == "file":
                return managed_file(format_path(file))
            return fsspec.open(file, **kwargs)
        if file.startswith("http"):
            return _process_http_file(file)
    if isinstance(file, list) and bool(file) and all(
            isinstance(f, str) for f in file):
        if _WITH_FSSPEC:
            if all(
                    infer_storage_options(f)["protocol"] == "file"
                    for f in file):
                return managed_file([format_path(f) for f in file])
            return fsspec.open_files(file, **kwargs)
    if isinstance(file, str):
        file = format_path(file)
    return managed_file(file)

예제 #28

0

파일 보기

파일: cache.py 프로젝트: paalka/intake

    def _load(self, _, __, urlpath, meta=True):
        import subprocess
        from fsspec import open_files

        path = os.path.join(self._cache_dir, self._hash(urlpath))
        dat, part = os.path.split(urlpath)
        cmd = ['dat', 'clone', dat, path, '--no-watch']
        try:
            subprocess.call(cmd, stdout=subprocess.PIPE)
        except (IOError, OSError):  # pragma: no cover
            logger.info('Calling DAT failed')
            raise
        newpath = os.path.join(path, part)

        if meta:
            for of in open_files(newpath):
                self._log_metadata(urlpath, urlpath, of.path)

예제 #29

0

파일 보기

파일: base.py 프로젝트: asirvathjames/intake

 def _data_to_source(cat, path, **kwargs):
     from intake.catalog.local import YAMLFileCatalog
     from fsspec import open_files
     import yaml
     if not isinstance(cat, Catalog):
         raise NotImplementedError
     out = {}
     for name in cat:
         entry = cat[name]
         out[name] = entry.__getstate__()
         out[name]['parameters'] = [up._captured_init_kwargs for up
                                    in entry._user_parameters]
         out[name]['kwargs'].pop('parameters')
     fn = posixpath.join(path, 'cat.yaml')
     with open_files([fn], 'wt')[0] as f:
         yaml.dump({'sources': out}, f)
     return YAMLFileCatalog(fn)

예제 #30

0

파일 보기

파일: base.py 프로젝트: dylanmcreynolds/intake

 def _data_to_source(cat, path, **kwargs):
     from intake.catalog.local import YAMLFileCatalog
     from fsspec import open_files
     import yaml
     if not isinstance(cat, Catalog):
         raise NotImplementedError
     out = {}
     # reach down into the private state because we apparently need the
     # Entry here rather than the public facing DataSource objects.
     for name, entry in cat._entries.items():
         out[name] = entry.__getstate__()
         out[name]['parameters'] = [up._captured_init_kwargs for up
                                    in entry._user_parameters]
         out[name]['kwargs'].pop('parameters')
     fn = posixpath.join(path, 'cat.yaml')
     with open_files([fn], 'wt')[0] as f:
         yaml.dump({'sources': out}, f)
     return YAMLFileCatalog(fn)