Example #1
0
 def _persist(source, path, **kwargs):
     from intake.catalog.local import YAMLFileCatalog
     from dask.bytes.core import open_files
     import yaml
     out = {}
     for name in source:
         entry = source[name]
         out[name] = entry.__getstate__()
     fn = posixpath.join(path, 'cat.yaml')
     with open_files([fn], 'wt')[0] as f:
         yaml.dump({'sources': out}, f)
     return YAMLFileCatalog(fn)
Example #2
0
def add_source_to_catalog(source: DataSource, catalog_file: Union[Path, str]):
    """
    API to add new data source to catalog_file.

    Args:
        source: data source to add.
        catalog_file: file where data source to be added. if file doesn't exist, file will be created.
    Examples:
        >>> import os
        >>> from intake.source.csv import CSVSource
        >>> import yaml
        >>> wd = os.getcwd()
        >>> source1 = CSVSource('test/temp/test1.csv')
        >>> source1.name = 'csv-test1'
        >>> # create new catalog with source
        >>> cfile = 'test/temp/test-catalog.yaml'
        >>> add_source_to_catalog(source1, cfile)
        >>> print(yaml.safe_load(Path(cfile).open().read().replace(wd, '')))
        ... # doctest: +NORMALIZE_WHITESPACE
        {'metadata': {}, 'sources': {'csv-test1': {'args': {'urlpath': 'test/temp/test1.csv'}, 'description': '', 'driver': 'intake.source.csv.CSVSource', 'metadata': {}}}}
        >>> source2 = CSVSource('test/temp/test2.csv')
        >>> source2.name = 'csv-test2'
        >>> add_source_to_catalog(source2, cfile)
        >>> print(yaml.safe_load(Path(cfile).open().read().replace(wd, '')))
        ... # doctest: +NORMALIZE_WHITESPACE
        {'metadata': {}, 'sources': {'csv-test1': {'args': {'urlpath': 'test/temp/test1.csv'}, 'description': '', 'driver': 'intake.source.csv.CSVSource', 'metadata': {'catalog_dir': '/test/temp/'}}, 'csv-test2': {'args': {'urlpath': 'test/temp/test2.csv'}, 'description': '', 'driver': 'intake.source.csv.CSVSource', 'metadata': {}}}}
        >>> os.remove(cfile)
    """
    catalog_file = local_or_s3_path(catalog_file)
    try:
        catalog: YAMLFileCatalog = YAMLFileCatalog(path=str(catalog_file))
    except FileNotFoundError:
        _catalog = open_catalog()
        _catalog.save(url=str(catalog_file))
        catalog: YAMLFileCatalog = YAMLFileCatalog(path=str(catalog_file))
    catalog.add(source, name=source.name)
Example #3
0
 def _persist(source, path, **kwargs):
     from intake.catalog.local import YAMLFileCatalog
     from dask.bytes.core import open_files
     import yaml
     out = {}
     for name in source:
         entry = source[name]
         out[name] = entry.__getstate__()
         out[name]['parameters'] = [
             up._captured_init_kwargs for up in entry._user_parameters
         ]
         out[name]['kwargs'].pop('parameters')
     fn = posixpath.join(path, 'cat.yaml')
     with open_files([fn], 'wt')[0] as f:
         yaml.dump({'sources': out}, f)
     return YAMLFileCatalog(fn)
Example #4
0
 def _data_to_source(cat, path, **kwargs):
     from intake.catalog.local import YAMLFileCatalog
     from fsspec import open_files
     import yaml
     if not isinstance(cat, Catalog):
         raise NotImplementedError
     out = {}
     for name in cat:
         entry = cat[name]
         out[name] = entry.__getstate__()
         out[name]['parameters'] = [up._captured_init_kwargs for up
                                    in entry._user_parameters]
         out[name]['kwargs'].pop('parameters')
     fn = posixpath.join(path, 'cat.yaml')
     with open_files([fn], 'wt')[0] as f:
         yaml.dump({'sources': out}, f)
     return YAMLFileCatalog(fn)
Example #5
0
 def _data_to_source(cat, path, **kwargs):
     from intake.catalog.local import YAMLFileCatalog
     from fsspec import open_files
     import yaml
     if not isinstance(cat, Catalog):
         raise NotImplementedError
     out = {}
     # reach down into the private state because we apparently need the
     # Entry here rather than the public facing DataSource objects.
     for name, entry in cat._entries.items():
         out[name] = entry.__getstate__()
         out[name]['parameters'] = [up._captured_init_kwargs for up
                                    in entry._user_parameters]
         out[name]['kwargs'].pop('parameters')
     fn = posixpath.join(path, 'cat.yaml')
     with open_files([fn], 'wt')[0] as f:
         yaml.dump({'sources': out}, f)
     return YAMLFileCatalog(fn)