def _persist(source, path, **kwargs): from intake.catalog.local import YAMLFileCatalog from dask.bytes.core import open_files import yaml out = {} for name in source: entry = source[name] out[name] = entry.__getstate__() fn = posixpath.join(path, 'cat.yaml') with open_files([fn], 'wt')[0] as f: yaml.dump({'sources': out}, f) return YAMLFileCatalog(fn)
def add_source_to_catalog(source: DataSource, catalog_file: Union[Path, str]): """ API to add new data source to catalog_file. Args: source: data source to add. catalog_file: file where data source to be added. if file doesn't exist, file will be created. Examples: >>> import os >>> from intake.source.csv import CSVSource >>> import yaml >>> wd = os.getcwd() >>> source1 = CSVSource('test/temp/test1.csv') >>> source1.name = 'csv-test1' >>> # create new catalog with source >>> cfile = 'test/temp/test-catalog.yaml' >>> add_source_to_catalog(source1, cfile) >>> print(yaml.safe_load(Path(cfile).open().read().replace(wd, ''))) ... # doctest: +NORMALIZE_WHITESPACE {'metadata': {}, 'sources': {'csv-test1': {'args': {'urlpath': 'test/temp/test1.csv'}, 'description': '', 'driver': 'intake.source.csv.CSVSource', 'metadata': {}}}} >>> source2 = CSVSource('test/temp/test2.csv') >>> source2.name = 'csv-test2' >>> add_source_to_catalog(source2, cfile) >>> print(yaml.safe_load(Path(cfile).open().read().replace(wd, ''))) ... # doctest: +NORMALIZE_WHITESPACE {'metadata': {}, 'sources': {'csv-test1': {'args': {'urlpath': 'test/temp/test1.csv'}, 'description': '', 'driver': 'intake.source.csv.CSVSource', 'metadata': {'catalog_dir': '/test/temp/'}}, 'csv-test2': {'args': {'urlpath': 'test/temp/test2.csv'}, 'description': '', 'driver': 'intake.source.csv.CSVSource', 'metadata': {}}}} >>> os.remove(cfile) """ catalog_file = local_or_s3_path(catalog_file) try: catalog: YAMLFileCatalog = YAMLFileCatalog(path=str(catalog_file)) except FileNotFoundError: _catalog = open_catalog() _catalog.save(url=str(catalog_file)) catalog: YAMLFileCatalog = YAMLFileCatalog(path=str(catalog_file)) catalog.add(source, name=source.name)
def _persist(source, path, **kwargs): from intake.catalog.local import YAMLFileCatalog from dask.bytes.core import open_files import yaml out = {} for name in source: entry = source[name] out[name] = entry.__getstate__() out[name]['parameters'] = [ up._captured_init_kwargs for up in entry._user_parameters ] out[name]['kwargs'].pop('parameters') fn = posixpath.join(path, 'cat.yaml') with open_files([fn], 'wt')[0] as f: yaml.dump({'sources': out}, f) return YAMLFileCatalog(fn)
def _data_to_source(cat, path, **kwargs): from intake.catalog.local import YAMLFileCatalog from fsspec import open_files import yaml if not isinstance(cat, Catalog): raise NotImplementedError out = {} for name in cat: entry = cat[name] out[name] = entry.__getstate__() out[name]['parameters'] = [up._captured_init_kwargs for up in entry._user_parameters] out[name]['kwargs'].pop('parameters') fn = posixpath.join(path, 'cat.yaml') with open_files([fn], 'wt')[0] as f: yaml.dump({'sources': out}, f) return YAMLFileCatalog(fn)
def _data_to_source(cat, path, **kwargs): from intake.catalog.local import YAMLFileCatalog from fsspec import open_files import yaml if not isinstance(cat, Catalog): raise NotImplementedError out = {} # reach down into the private state because we apparently need the # Entry here rather than the public facing DataSource objects. for name, entry in cat._entries.items(): out[name] = entry.__getstate__() out[name]['parameters'] = [up._captured_init_kwargs for up in entry._user_parameters] out[name]['kwargs'].pop('parameters') fn = posixpath.join(path, 'cat.yaml') with open_files([fn], 'wt')[0] as f: yaml.dump({'sources': out}, f) return YAMLFileCatalog(fn)