Ejemplo n.º 1
0
def test_remote(intake_server):
    cat_local = intake.open_catalog(cat_file)
    cat_remote = intake.open_catalog(intake_server)
    assert 'outer' in cat_remote
    assert 'outer' in cat_local
    print(tuple(cat_remote))
    print(tuple(cat_remote['outer']()))
    print(tuple(cat_remote['outer']()['circle']()))
    print(cat_remote['outer']()['circle']()['green'])
    print(cat_remote['outer']()['circle']()['green'].read())
Ejemplo n.º 2
0
def test_multi_cat_names():
    fn = abspath("catalog_union*.yml")
    cat = open_catalog(fn)
    assert cat.name == fn
    assert fn in repr(cat)

    fn1 = abspath("catalog_union_1.yml")
    fn2 = abspath("catalog_union_2.yml")
    cat = open_catalog([fn1, fn2])
    assert cat.name == '2 files'
Ejemplo n.º 3
0
def bundle(request, intake_server, example_data):  # noqa
    tmp_dir = TMP_DIRS[request.param]
    tmp_data_dir = Path(tmp_dir) / 'data'
    serializer_partial = partial(Serializer, tmp_data_dir)
    serializer = serializer_partial()
    uid, docs = example_data
    for name, doc in docs:
        serializer(name, doc)
    serializer.close()

    fullname = os.path.join(tmp_dir, YAML_FILENAME)
    with open(fullname, 'w') as f:
        f.write(f'''
sources:
  xyz:
    description: Some imaginary beamline
    driver: "bluesky-jsonl-catalog"
    container: catalog
    args:
      paths: {tmp_data_dir / "*.jsonl"}
      handler_registry:
        NPY_SEQ: ophyd.sim.NumpySeqHandler
    metadata:
      beamline: "00-ID"
  xyz_with_transforms:
    description: Some imaginary beamline
    driver: "bluesky-jsonl-catalog"
    container: catalog
    args:
      paths: [{tmp_data_dir / "*.jsonl"}]
      handler_registry:
        NPY_SEQ: ophyd.sim.NumpySeqHandler
      transforms:
        start: databroker.tests.test_v2.transform.transform
        stop: databroker.tests.test_v2.transform.transform
        resource: databroker.tests.test_v2.transform.transform
        descriptor: databroker.tests.test_v2.transform.transform
    metadata:
      beamline: "00-ID"
        ''')

    time.sleep(2)
    remote = request.param == 'remote'

    if request.param == 'local':
        cat = intake.open_catalog(os.path.join(tmp_dir, YAML_FILENAME))
    elif request.param == 'remote':
        cat = intake.open_catalog(intake_server, page_size=10)
    else:
        raise ValueError
    return types.SimpleNamespace(cat=cat,
                                 uid=uid,
                                 docs=docs,
                                 remote=remote,
                                 serializer_partial=serializer_partial)
Ejemplo n.º 4
0
def test_cat_with_no_declared_name_gets_name_from_dir_if_file_named_catalog():
    fn = abspath("catalog.yml")
    cat = open_catalog(fn,
                       name='name_in_func',
                       description='Description in func')
    assert cat.name == 'name_in_func'
    assert cat.description == 'Description in func'

    cat = open_catalog(fn)
    assert cat.name == 'tests'
    assert cat.description == None
Ejemplo n.º 5
0
def test_cat_with_declared_name():
    fn = abspath("catalog_named.yml")
    description = 'Description declared in the open function'
    cat = open_catalog(fn, name='name_in_func', description=description)
    assert cat.name == 'name_in_func'
    assert cat.description == description
    assert cat.metadata.get('some') == 'thing'

    cat = open_catalog(fn)
    assert cat.name == 'name_in_spec'
    assert cat.description == 'This is a catalog with a description in the yaml'
Ejemplo n.º 6
0
def test_remote_netcdf(intake_server):
    cat_local = intake.open_catalog(cat_file)
    cat = intake.open_catalog(intake_server)
    assert 'xarray_source' in cat
    source = cat.xarray_source()
    assert isinstance(source._ds, xr.Dataset)
    assert source._schema is None
    source._get_schema()
    assert source._schema is not None
    assert (source.to_dask().rh.data.compute() ==
            cat_local.xarray_source.to_dask().rh.data.compute()).all()
Ejemplo n.º 7
0
def main(argv=None):
    from intake.config import conf
    from intake import open_catalog

    if argv is None:
        argv = sys.argv

    parser = argparse.ArgumentParser(description='Intake Catalog Server')
    parser.add_argument('-p', '--port', type=int, default=conf['port'],
                        help='port number for server to listen on')
    parser.add_argument('--list-entries', action='store_true',
                        help='list catalog entries at startup')
    parser.add_argument('--sys-exit-on-sigterm', action='store_true',
                        help='internal flag used during unit testing to ensure '
                             '.coverage file is written')
    parser.add_argument('catalog_args', metavar='FILE', type=str, nargs='+',
                        help='Name of catalog YAML file')
    parser.add_argument('--flatten', dest='flatten', action='store_true')
    parser.add_argument('--no-flatten', dest='flatten', action='store_false')
    parser.set_defaults(flatten=True)
    args = parser.parse_args(argv[1:])

    if args.sys_exit_on_sigterm:
        signal.signal(signal.SIGTERM, call_exit_on_sigterm)

    logger.info('Creating catalog from:')
    for arg in args.catalog_args:
        logger.info('  - %s' % arg)

    catargs = args.catalog_args
    if len(catargs) == 1:
        catalog = open_catalog(catargs[0])
        logger.info("catalog_args: %s" % catargs[0])
    else:
        catalog = open_catalog(catargs, flatten=args.flatten)
        logger.info("catalog_args: %s" % catargs)
    if args.list_entries:
        # This is not a good idea if the Catalog is huge.
        logger.info('Entries:' + ','.join(list(catalog)))

    logger.info('Listening on port %d' % args.port)

    server = IntakeServer(catalog)
    app = server.make_app()
    server.start_periodic_functions(close_idle_after=3600.0)

    app.listen(args.port)
    try:
        tornado.ioloop.IOLoop.current().start()
    except KeyboardInterrupt:
        logger.critical("Exiting")
    except Exception as e:
        logger.critical("Exiting due to %s" % e)
Ejemplo n.º 8
0
def test_text_export(temp_cache):
    import tempfile
    outdir = tempfile.mkdtemp()
    cat = intake.open_catalog(os.path.join(here, 'sources.yaml'))
    s = cat.sometext()
    out = s.export(outdir)
    fn = os.path.join(outdir, 'cat.yaml')
    with open(fn, 'w') as f:
        f.write(out.yaml())
    cat = intake.open_catalog(fn)
    s2 = cat[s.name]()
    assert s.read() == s2.read()
Ejemplo n.º 9
0
def test_remote_tiff(intake_server):
    pytest.importorskip('rasterio')
    cat_local = intake.open_catalog(cat_file)
    cat = intake.open_catalog(intake_server)
    assert 'tiff_source' in cat
    source = cat.tiff_source()
    assert isinstance(source._ds, xr.Dataset)
    assert source._schema is None
    source._get_schema()
    assert source._schema is not None
    remote = source.to_dask().data.compute()
    local = cat_local.tiff_source.to_dask().data.compute()
    assert (remote == local).all()
Ejemplo n.º 10
0
def test_zarr_entries_in_yaml_catalog(temp_zarr):
    import dask.array as da

    # open YAML catalog file
    _, _, _, catalog_file = temp_zarr
    cat = open_catalog(catalog_file)

    # test entries
    assert_items_equal(['root', 'bar', 'eggs'], list(cat))

    # entry pointing to zarr root group
    assert isinstance(cat['root'], ZarrGroupCatalog)
    assert_items_equal(['foo', 'bar', 'baz'], list(cat['root']))
    assert 'catalog' == cat['root'].describe()['container']
    assert isinstance(cat['root'].to_zarr(), zarr.hierarchy.Group)

    # entry pointing to zarr sub-group
    assert isinstance(cat['bar'], ZarrGroupCatalog)
    assert_items_equal(['spam', 'eggs'], list(cat['bar']))
    assert 'catalog' == cat['bar'].describe()['container']
    assert isinstance(cat['bar'].to_zarr(), zarr.hierarchy.Group)

    # entry pointing to zarr array
    assert isinstance(cat['eggs'], ZarrArraySource)
    assert 'ndarray' == cat['eggs'].describe()['container']
    assert isinstance(cat['eggs'].to_dask(), da.Array)
Ejemplo n.º 11
0
def test_nested_catalog_access(tmp_path_catalog_nested):
    cat = intake.open_catalog(tmp_path_catalog_nested)
    entry1 = cat.nested.ex1
    entry2 = cat["nested.ex1"]
    entry3 = cat[["nested", "ex1"]]
    entry4 = cat["nested", "ex1"]
    assert entry1 == entry2 == entry3 == entry4
Ejemplo n.º 12
0
def test_plot():
    pytest.importorskip("streamz.dataframe")
    pytest.importorskip("hvplot")
    cat = intake.open_catalog(catfile)
    s = cat.df.read()
    pl = s.plot()
    assert "DynamicMap" in str(pl)
Ejemplo n.º 13
0
def open_intake_catalog(platform, config):
    """Returns an Intake catalog for a specified platform and config

    Uses the package resources included in the om4labs distribution
    to determine the directory of the intake catalogs, unless it
    is overridden by the "OM4LABS_CATALOG_DIR" environment var.

    Parameters
    ----------
    platform : str
        Site description, e.g. "gfdl", "orion", "testing"
    config : str
        Model configuration, e.g. "OM4p5", "OM4p25"

    Returns
    -------
    intake.catalog.Catalog
        Intake catalog corresponding to specified platform/config
    """

    catalog_str = f"{config}_catalog_{platform}.yml"

    if "OM4LABS_CATALOG_DIR" in os.environ.keys():
        catfile = f"{os.environ['OM4LABS_CATALOG_DIR']}/{catalog_str}"
    else:
        catfile = pkgr.resource_filename("om4labs", f"catalogs/{catalog_str}")

    cat = intake.open_catalog(catfile)

    return cat
Ejemplo n.º 14
0
def test_cache_to_cat(tmpdir):
    old = intake.config.conf.copy()
    olddir = intake.config.confdir
    intake.config.confdir = str(tmpdir)
    intake.config.conf.update({
        'cache_dir': 'catdir',
        'cache_download_progress': False,
        'cache_disabled': False
    })
    try:
        fn0 = os.path.join(here, 'calvert_uk.zip')
        fn1 = os.path.join(tmpdir, 'calvert_uk.zip')
        shutil.copy2(fn0, fn1)
        fn0 = os.path.join(here, 'cached.yaml')
        fn1 = os.path.join(tmpdir, 'cached.yaml')
        shutil.copy2(fn0, fn1)
        cat = intake.open_catalog(fn1)
        s = cat.calvert()
        df = s.read()
        assert len(df)
        md = CacheMetadata()
        f = md[s._urlpath][0]
        assert f['cache_path'].startswith(str(tmpdir))
        assert 'intake_cache' in os.listdir(tmpdir)
        assert os.listdir(os.path.join(tmpdir, 'intake_cache'))
    finally:
        intake.config.confdir = olddir
        intake.config.conf.update(old)
Ejemplo n.º 15
0
    def _databroker():
        mongo_box = MongoBox()
        try:
            mongo_box.start()
            mongo_client = mongo_box.client()
            mongo_host, mongo_port = mongo_client.address
            mongo_uri = f"mongodb://{mongo_host}:{mongo_port}"
            catalog_descriptor_path = tmp_path / Path("mad.yml")
            with open(catalog_descriptor_path, "w") as f:
                f.write(f"""\
sources:
  mad:
    description: Made up beamline
    driver: "bluesky-mongo-normalized-catalog"
    container: catalog
    args:
      metadatastore_db: {mongo_uri}
      asset_registry_db: {mongo_uri}
      handler_registry:
        NPY_SEQ: ophyd.sim.NumpySeqHandler
    metadata:
      beamline: "00-ID"
""")

            yield intake.open_catalog(catalog_descriptor_path)
        finally:
            mongo_box.stop()
Ejemplo n.º 16
0
def test_flatten_duplicate_error():
    path = tempfile.mkdtemp()
    f1 = os.path.join(path, 'catalog.yaml')
    path = tempfile.mkdtemp()
    f2 = os.path.join(path, 'catalog.yaml')
    for f in [f1, f2]:
        with open(f, 'w') as fo:
            fo.write("""
        sources:
          a:
            driver: csv
            args:
              urlpath: /not/a/file
        """)
    with pytest.raises(ValueError):
        open_catalog([f1, f2])
Ejemplo n.º 17
0
def test_fsspec_integration():
    import fsspec
    import pandas as pd
    mem = fsspec.filesystem('memory')
    with mem.open('cat.yaml', 'wt') as f:
        f.write("""
sources:
  implicit:
    driver: csv
    description: o
    args:
      urlpath: "{{CATALOG_DIR}}/file.csv"
  explicit:
    driver: csv
    description: o
    args:
      urlpath: "memory:///file.csv"
  extra:
    driver: csv
    description: o
    args:
      urlpath: "{{CATALOG_DIR}}/file.csv"
      storage_options: {other: option}"""
                )
    with mem.open('/file.csv', 'wt') as f:
        f.write("a,b\n0,1")
    expected = pd.DataFrame({'a': [0], 'b': [1]})
    cat = open_catalog("memory://cat.yaml")
    assert list(cat) == ['implicit', 'explicit', 'extra']
    assert cat.implicit.read().equals(expected)
    assert cat.explicit.read().equals(expected)
    s = cat.extra()
    assert s._storage_options['other']
Ejemplo n.º 18
0
def fetch_bokeh_sources(catalog_filename):
    """Define Bokeh Data Sources

    """
    catalog = intake.open_catalog(catalog_filename)
    dataframes = {
        'mulch': catalog.solid_waste_mulch.read().pivot(index='Month', columns='Type')
    }
    sources = {
        'mulch': ColumnDataSource(dataframes['mulch']),
    }

    comm_df = catalog.solid_waste_commodity_recycling.read()
    comm_df = comm_df.rename(columns={
        "Magnolia & Alice": "East Knoxville Recycling Center",
        "225 Moody": "South Knoxville Recycling Center",
        "4440 Western Av.": "North Knoxville Recycling Center",
        "341 Parkvillage": "West Knoxville Recycling Center",
        "227 Willow Av.": "Downtown Knoxville Recycling Center",
        "Curbside City-Wide": "Curbside City-Wide Pickup",
        "Downtown": "Downtown Pickup",
        "KPD": "KPD", # what is this?
        "Recycling Trailer": "Recycling Trailer"
    })
    for material in {'Glass', 'Cardboard', 'Mixed Paper', 'Plastics ("Commingled")'}:
        dataframes[f'commodity_{material}'] = comm_df[comm_df.Type == material].fillna(0).pivot(index='Month', columns='Type')
        sources[f'commodity_{material}'] = ColumnDataSource(dataframes[f'commodity_{material}'])

    return {'dataframes': dataframes, 'sources': sources}
Ejemplo n.º 19
0
def test_reload_missing_remote_directory(intake_server):
    try:
        shutil.rmtree(TMP_DIR)
    except:
        pass

    time.sleep(1)
    catalog = open_catalog(intake_server, ttl=0.1)
    assert_items_equal(list(catalog), [])

    os.mkdir(TMP_DIR)
    with open(os.path.join(TMP_DIR, YAML_FILENAME), 'w') as f:
        f.write('''
plugins:
  source:
    - module: intake.catalog.tests.example1_source
    - module: intake.catalog.tests.example_plugin_dir.example2_source
sources:
  use_example1:
    description: example1 source plugin
    driver: example1
    args: {}
        ''')
    time.sleep(1.2)

    assert_items_equal(list(catalog), ['use_example1'])
    try:
        shutil.rmtree(TMP_DIR)
    except:
        pass
Ejemplo n.º 20
0
def parse(input_bam, virtual_digest_catalog, output_prefix, n_workers,
          chunksize):
    """Filter the read-sorted alignments in INPUT_BAM and save the results under OUTPUT_PREFIX

    """
    from pore_c.analyses.alignments import parse_alignment_bam

    file_paths = catalogs.AlignmentDfCatalog.generate_paths(output_prefix)

    vd_cat = open_catalog(str(virtual_digest_catalog))
    fragment_df = vd_cat.fragments.read()
    final_stats = parse_alignment_bam(
        input_bam,
        fragment_df,
        alignment_table=file_paths["alignment"],
        read_table=file_paths["read"],
        overlap_table=file_paths["overlap"],
        alignment_summary=file_paths["alignment_summary"],
        read_summary=file_paths["read_summary"],
        n_workers=n_workers,
        chunksize=chunksize,
    )
    metadata = {"final_stats": final_stats}
    file_paths["virtual_digest"] = Path(virtual_digest_catalog)
    file_paths["input_bam"] = Path(input_bam)
    adf_cat = catalogs.AlignmentDfCatalog.create(file_paths, metadata, {})
    logger.info(str(adf_cat))
Ejemplo n.º 21
0
def test_simple():
    cat = intake.open_catalog(catfile)
    s = cat.simple.read()
    l = s.sink_to_list()
    assert not l
    s.start()
    wait_for(lambda: l == [1, 2, 3], timeout=1)
Ejemplo n.º 22
0
def test_parquet(temp_cache):
    inp = pytest.importorskip('intake_parquet')
    cat = intake.open_catalog(
        os.path.abspath(os.path.join(path, 'catalog1.yml')))
    s = cat.entry1()
    s2 = s.persist()
    assert isinstance(s2, inp.ParquetSource)
Ejemplo n.º 23
0
def test_multi_cat_names():
    fn = abspath("catalog_union*.yml")
    cat = open_catalog(fn)
    assert cat.name == fn
    assert fn in repr(cat)

    fn1 = abspath("catalog_union_1.yml")
    fn2 = abspath("catalog_union_2.yml")
    cat = open_catalog([fn1, fn2])
    assert cat.name == '2 files'
    assert cat.description == 'Catalog generated from 2 files'

    cat = open_catalog([fn1, fn2], name='special_name',
                       description='Special description')
    assert cat.name == 'special_name'
    assert cat.description == 'Special description'
def test_reload_updated_config(intake_server_with_config):
    catalog = open_catalog(intake_server_with_config)

    entries = list(catalog)
    assert entries == ['use_example1']

    with open(os.path.join(TMP_DIR, YAML_FILENAME), 'w') as f:
        f.write('''
plugins:
  source:
    - module: intake.catalog.tests.example1_source
    - module: intake.catalog.tests.example_plugin_dir.example2_source
sources:
  use_example1:
    description: example1 source plugin
    driver: example1
    args: {}
  use_example1_1:
    description: example1 other
    driver: example1
    args: {}
        ''')

    time.sleep(2)

    assert_items_equal(list(catalog), ['use_example1', 'use_example1_1'])
Ejemplo n.º 25
0
def test_cat_add(tmpdir):
    tmpdir = str(tmpdir)
    fn = os.path.join(tmpdir, 'cat.yaml')
    with open(fn, 'w') as f:
        f.write('sources: {}')
    cat = open_catalog(fn)
    assert list(cat) == []

    # was added in memory
    cat.add(cat)
    cat._load()  # this would happen automatically, but not immediately
    assert list(cat) == ['cat']

    # was added to the file
    cat = open_catalog(fn)
    assert list(cat) == ['cat']
Ejemplo n.º 26
0
def test_read_direct(intake_server):
    catalog = open_catalog(intake_server)

    d = catalog['entry1_part'].configure_new(part='2')
    test_dir = os.path.dirname(__file__)
    file2 = os.path.join(test_dir, 'entry1_2.csv')
    expected_df = pd.read_csv(file2)
    meta = expected_df[:0]

    info = d.discover()

    assert info['dtype'] == {
        k: str(v)
        for k, v in meta.dtypes.to_dict().items()
    }
    assert info['npartitions'] == 1
    assert info['shape'] == (None, 3)  # Do not know CSV size ahead of time
    md = info['metadata'].copy()
    md.pop('catalog_dir', None)
    assert md == {'bar': [2, 4, 6], 'foo': 'baz'}

    md = d.metadata.copy()
    md.pop('catalog_dir', None)
    assert md == dict(foo='baz', bar=[2, 4, 6])
    assert d.description == 'entry1 part'
    df = d.read()

    assert expected_df.equals(df)
Ejemplo n.º 27
0
def test_dir_cache(tmpdir, temp_cache):
    [
        os.makedirs(os.path.join(tmpdir, d))
        for d in ['main', 'main/sub1', 'main/sub2']
    ]
    for f in [
            'main/afile', 'main/sub1/subfile', 'main/sub2/subfile1',
            'main/sub2/subfile2'
    ]:
        fn = os.path.join(tmpdir, f)
        with open(fn, 'w') as fo:
            fo.write(f)
    fn = os.path.join(tmpdir, 'cached.yaml')
    shutil.copy2(os.path.join(here, 'cached.yaml'), fn)
    cat = intake.open_catalog(fn)
    s = cat.dirs()
    out = s.cache[0].load(s._urlpath, output=False)
    assert out[0] == os.path.join(tmpdir, s.cache[0]._path(s._urlpath))
    assert open(os.path.join(out[0], 'afile')).read() == 'main/afile'
    md = CacheMetadata()
    got = md[s._urlpath]

    # Avoid re-copy
    s = cat.dirs()
    s.cache[0].load(s._urlpath, output=False)
    md2 = CacheMetadata()
    got2 = md2[s._urlpath]
    assert got == got2
Ejemplo n.º 28
0
 def _initializeCatalog(self, **kwargs):
     file_uri: str = self.catURI
     file_exists = os.path.isfile( file_uri )
     print( f"Opening catalog file: {file_uri}")
     self._catalog = intake.open_catalog( file_uri, driver="yaml_file_cat", autoreload=file_exists, name=self.name, **kwargs )
     if file_exists: self.validate()
     else: self.save()
def test_read_pattern_path_not_as_pattern():
    pytest.importorskip('rasterio')
    cat = intake.open_catalog(os.path.join(here, 'data', 'catalog.yaml'))
    green = cat.pattern_tiff_source_path_not_as_pattern()

    da = green.read()
    assert len(da.band) == 3
Ejemplo n.º 30
0
def test_read(intake_server):
    catalog = open_catalog(intake_server)

    d = catalog['entry1']

    test_dir = os.path.dirname(__file__)
    file1 = os.path.join(test_dir, 'entry1_1.csv')
    file2 = os.path.join(test_dir, 'entry1_2.csv')
    expected_df = pd.concat((pd.read_csv(file1), pd.read_csv(file2)))
    meta = expected_df[:0]

    info = d.discover()
    assert info['dtype'] == {
        k: str(v)
        for k, v in meta.dtypes.to_dict().items()
    }
    assert info['npartitions'] == 2
    assert info['shape'] == (None, 3)  # Do not know CSV size ahead of time

    md = d.metadata.copy()
    assert md['foo'] == 'bar'
    assert md['bar'] == [1, 2, 3]

    df = d.read()

    assert expected_df.equals(df)